Commit 70a318c6391c338bc3a11330c0d931e6d081db5c

Authored by tangwang
1 parent 70dab99f

fix bug

Showing 1 changed file with 13 additions and 13 deletions   Show diff stats
embeddings/text_encoder.py
@@ -113,7 +113,19 @@ class BgeEncoder: @@ -113,7 +113,19 @@ class BgeEncoder:
113 uncached_indices: List[int] = [] 113 uncached_indices: List[int] = []
114 uncached_texts: List[str] = [] 114 uncached_texts: List[str] = []
115 115
116 - # Prepare request data for uncached texts 116 + # Process response
  117 + # Each element can be np.ndarray or None (表示该文本没有可用的向量)
  118 + embeddings: List[Optional[np.ndarray]] = [None] * len(sentences)
  119 +
  120 + for i, text in enumerate(sentences):
  121 + cached = self._get_cached_embedding(text, 'en') # Use 'en' as default language for title embedding
  122 + if cached is not None:
  123 + embeddings[i] = cached
  124 + else:
  125 + uncached_indices.append(i)
  126 + uncached_texts.append(text)
  127 +
  128 + # Prepare request data for uncached texts (after cache check)
117 request_data = [] 129 request_data = []
118 for i, text in enumerate(uncached_texts): 130 for i, text in enumerate(uncached_texts):
119 request_item = { 131 request_item = {
@@ -127,18 +139,6 @@ class BgeEncoder: @@ -127,18 +139,6 @@ class BgeEncoder:
127 request_item["name_ru"] = None 139 request_item["name_ru"] = None
128 140
129 request_data.append(request_item) 141 request_data.append(request_item)
130 -  
131 - # Process response  
132 - # Each element can be np.ndarray or None (表示该文本没有可用的向量)  
133 - embeddings: List[Optional[np.ndarray]] = [None] * len(sentences)  
134 -  
135 - for i, text in enumerate(sentences):  
136 - cached = self._get_cached_embedding(text, 'en') # Use 'en' as default language for title embedding  
137 - if cached is not None:  
138 - embeddings[i] = cached  
139 - else:  
140 - uncached_indices.append(i)  
141 - uncached_texts.append(text)  
142 142
143 # If there are uncached texts, call service 143 # If there are uncached texts, call service
144 if uncached_texts: 144 if uncached_texts: