Commit 70a318c6391c338bc3a11330c0d931e6d081db5c
1 parent
70dab99f
fix bug
Showing
1 changed file
with
13 additions
and
13 deletions
Show diff stats
embeddings/text_encoder.py
| @@ -113,7 +113,19 @@ class BgeEncoder: | @@ -113,7 +113,19 @@ class BgeEncoder: | ||
| 113 | uncached_indices: List[int] = [] | 113 | uncached_indices: List[int] = [] |
| 114 | uncached_texts: List[str] = [] | 114 | uncached_texts: List[str] = [] |
| 115 | 115 | ||
| 116 | - # Prepare request data for uncached texts | 116 | + # Process response |
| 117 | + # Each element can be np.ndarray or None (表示该文本没有可用的向量) | ||
| 118 | + embeddings: List[Optional[np.ndarray]] = [None] * len(sentences) | ||
| 119 | + | ||
| 120 | + for i, text in enumerate(sentences): | ||
| 121 | + cached = self._get_cached_embedding(text, 'en') # Use 'en' as default language for title embedding | ||
| 122 | + if cached is not None: | ||
| 123 | + embeddings[i] = cached | ||
| 124 | + else: | ||
| 125 | + uncached_indices.append(i) | ||
| 126 | + uncached_texts.append(text) | ||
| 127 | + | ||
| 128 | + # Prepare request data for uncached texts (after cache check) | ||
| 117 | request_data = [] | 129 | request_data = [] |
| 118 | for i, text in enumerate(uncached_texts): | 130 | for i, text in enumerate(uncached_texts): |
| 119 | request_item = { | 131 | request_item = { |
| @@ -127,18 +139,6 @@ class BgeEncoder: | @@ -127,18 +139,6 @@ class BgeEncoder: | ||
| 127 | request_item["name_ru"] = None | 139 | request_item["name_ru"] = None |
| 128 | 140 | ||
| 129 | request_data.append(request_item) | 141 | request_data.append(request_item) |
| 130 | - | ||
| 131 | - # Process response | ||
| 132 | - # Each element can be np.ndarray or None (表示该文本没有可用的向量) | ||
| 133 | - embeddings: List[Optional[np.ndarray]] = [None] * len(sentences) | ||
| 134 | - | ||
| 135 | - for i, text in enumerate(sentences): | ||
| 136 | - cached = self._get_cached_embedding(text, 'en') # Use 'en' as default language for title embedding | ||
| 137 | - if cached is not None: | ||
| 138 | - embeddings[i] = cached | ||
| 139 | - else: | ||
| 140 | - uncached_indices.append(i) | ||
| 141 | - uncached_texts.append(text) | ||
| 142 | 142 | ||
| 143 | # If there are uncached texts, call service | 143 | # If there are uncached texts, call service |
| 144 | if uncached_texts: | 144 | if uncached_texts: |