cache.py
4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""
Cache utility for storing embedding results.
"""
import json
import hashlib
import pickle
from pathlib import Path
from typing import Any, Optional
import numpy as np
class EmbeddingCache:
"""
Simple file-based cache for embeddings.
Uses MD5 hash of input text/URL as cache key.
"""
def __init__(self, cache_dir: str = ".cache/embeddings"):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
def _get_cache_key(self, input_str: str) -> str:
"""Generate cache key from input string."""
return hashlib.md5(input_str.encode('utf-8')).hexdigest()
def get(self, input_str: str) -> Optional[np.ndarray]:
"""
Get cached embedding.
Args:
input_str: Input text or URL
Returns:
Cached embedding or None if not found
"""
cache_key = self._get_cache_key(input_str)
cache_file = self.cache_dir / f"{cache_key}.npy"
if cache_file.exists():
try:
return np.load(cache_file)
except Exception as e:
print(f"Failed to load cache for {input_str}: {e}")
return None
return None
def set(self, input_str: str, embedding: np.ndarray) -> bool:
"""
Store embedding in cache.
Args:
input_str: Input text or URL
embedding: Embedding vector
Returns:
True if successful
"""
cache_key = self._get_cache_key(input_str)
cache_file = self.cache_dir / f"{cache_key}.npy"
try:
np.save(cache_file, embedding)
return True
except Exception as e:
print(f"Failed to cache embedding for {input_str}: {e}")
return False
def exists(self, input_str: str) -> bool:
"""Check if embedding is cached."""
cache_key = self._get_cache_key(input_str)
cache_file = self.cache_dir / f"{cache_key}.npy"
return cache_file.exists()
def clear(self) -> int:
"""
Clear all cached embeddings.
Returns:
Number of files deleted
"""
count = 0
for cache_file in self.cache_dir.glob("*.npy"):
cache_file.unlink()
count += 1
return count
def size(self) -> int:
"""Get number of cached embeddings."""
return len(list(self.cache_dir.glob("*.npy")))
class DictCache:
"""
Simple dictionary-based cache for query rewrite rules, translations, etc.
"""
def __init__(self, cache_file: str = ".cache/dict_cache.json"):
self.cache_file = Path(cache_file)
self.cache_file.parent.mkdir(parents=True, exist_ok=True)
self.cache = self._load()
def _load(self) -> dict:
"""Load cache from file."""
if self.cache_file.exists():
try:
with open(self.cache_file, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
print(f"Failed to load cache: {e}")
return {}
return {}
def _save(self) -> bool:
"""Save cache to file."""
try:
with open(self.cache_file, 'w', encoding='utf-8') as f:
json.dump(self.cache, f, ensure_ascii=False, indent=2)
return True
except Exception as e:
print(f"Failed to save cache: {e}")
return False
def get(self, key: str, category: str = "default") -> Optional[Any]:
"""
Get cached value.
Args:
key: Cache key
category: Cache category (for organizing different types of data)
Returns:
Cached value or None
"""
return self.cache.get(category, {}).get(key)
def set(self, key: str, value: Any, category: str = "default") -> bool:
"""
Store value in cache.
Args:
key: Cache key
value: Value to cache
category: Cache category
Returns:
True if successful
"""
if category not in self.cache:
self.cache[category] = {}
self.cache[category][key] = value
return self._save()
def exists(self, key: str, category: str = "default") -> bool:
"""Check if key exists in cache."""
return category in self.cache and key in self.cache[category]
def clear(self, category: Optional[str] = None) -> bool:
"""
Clear cache.
Args:
category: If specified, clear only this category. Otherwise clear all.
Returns:
True if successful
"""
if category:
if category in self.cache:
del self.cache[category]
else:
self.cache = {}
return self._save()