Commit 70a318c6391c338bc3a11330c0d931e6d081db5c
1 parent
70dab99f
fix bug
Showing
1 changed file
with
13 additions
and
13 deletions
Show diff stats
embeddings/text_encoder.py
| ... | ... | @@ -113,7 +113,19 @@ class BgeEncoder: |
| 113 | 113 | uncached_indices: List[int] = [] |
| 114 | 114 | uncached_texts: List[str] = [] |
| 115 | 115 | |
| 116 | - # Prepare request data for uncached texts | |
| 116 | + # Process response | |
| 117 | + # Each element can be np.ndarray or None (表示该文本没有可用的向量) | |
| 118 | + embeddings: List[Optional[np.ndarray]] = [None] * len(sentences) | |
| 119 | + | |
| 120 | + for i, text in enumerate(sentences): | |
| 121 | + cached = self._get_cached_embedding(text, 'en') # Use 'en' as default language for title embedding | |
| 122 | + if cached is not None: | |
| 123 | + embeddings[i] = cached | |
| 124 | + else: | |
| 125 | + uncached_indices.append(i) | |
| 126 | + uncached_texts.append(text) | |
| 127 | + | |
| 128 | + # Prepare request data for uncached texts (after cache check) | |
| 117 | 129 | request_data = [] |
| 118 | 130 | for i, text in enumerate(uncached_texts): |
| 119 | 131 | request_item = { |
| ... | ... | @@ -127,18 +139,6 @@ class BgeEncoder: |
| 127 | 139 | request_item["name_ru"] = None |
| 128 | 140 | |
| 129 | 141 | request_data.append(request_item) |
| 130 | - | |
| 131 | - # Process response | |
| 132 | - # Each element can be np.ndarray or None (表示该文本没有可用的向量) | |
| 133 | - embeddings: List[Optional[np.ndarray]] = [None] * len(sentences) | |
| 134 | - | |
| 135 | - for i, text in enumerate(sentences): | |
| 136 | - cached = self._get_cached_embedding(text, 'en') # Use 'en' as default language for title embedding | |
| 137 | - if cached is not None: | |
| 138 | - embeddings[i] = cached | |
| 139 | - else: | |
| 140 | - uncached_indices.append(i) | |
| 141 | - uncached_texts.append(text) | |
| 142 | 142 | |
| 143 | 143 | # If there are uncached texts, call service |
| 144 | 144 | if uncached_texts: | ... | ... |