Commit 272aeabee72d2d8ef22a514c5d50cb12c2948aed

Authored by tangwang
1 parent a7cc9078

调参

@@ -281,8 +281,8 @@ class AppConfigLoader: @@ -281,8 +281,8 @@ class AppConfigLoader:
281 ["title", "brief", "vendor", "category_name_text"], 281 ["title", "brief", "vendor", "category_name_text"],
282 ) 282 )
283 ), 283 ),
284 - base_minimum_should_match=str(text_strategy.get("base_minimum_should_match", "75%")),  
285 - translation_minimum_should_match=str(text_strategy.get("translation_minimum_should_match", "75%")), 284 + base_minimum_should_match=str(text_strategy.get("base_minimum_should_match", "70%")),
  285 + translation_minimum_should_match=str(text_strategy.get("translation_minimum_should_match", "70%")),
286 translation_boost=float(text_strategy.get("translation_boost", 0.4)), 286 translation_boost=float(text_strategy.get("translation_boost", 0.4)),
287 translation_boost_when_source_missing=float( 287 translation_boost_when_source_missing=float(
288 text_strategy.get("translation_boost_when_source_missing", 1.0) 288 text_strategy.get("translation_boost_when_source_missing", 1.0)
@@ -51,8 +51,8 @@ class QueryConfig: @@ -51,8 +51,8 @@ class QueryConfig:
51 core_multilingual_fields: List[str] = field( 51 core_multilingual_fields: List[str] = field(
52 default_factory=lambda: ["title", "brief", "vendor", "category_name_text"] 52 default_factory=lambda: ["title", "brief", "vendor", "category_name_text"]
53 ) 53 )
54 - base_minimum_should_match: str = "75%"  
55 - translation_minimum_should_match: str = "75%" 54 + base_minimum_should_match: str = "70%"
  55 + translation_minimum_should_match: str = "70%"
56 translation_boost: float = 0.4 56 translation_boost: float = 0.4
57 translation_boost_when_source_missing: float = 1.0 57 translation_boost_when_source_missing: float = 1.0
58 source_boost_when_missing: float = 0.6 58 source_boost_when_missing: float = 0.6
@@ -236,10 +236,13 @@ config/environments/<env>.yaml @@ -236,10 +236,13 @@ config/environments/<env>.yaml
236 236
237 237
238 238
  239 +筛选SKU: 先只筛选第一个维度,但考虑到用户搜索词可能带了尺码,所以第二、三个维度也要考虑
239 240
240 241
241 -  
242 - 242 +引入图片的相关性:
  243 +图片的向量最好做SKU维度,用 SPU 维度还是 SKU 维度?
  244 +1. SKU维度(主款式,option1维度),如果用户搜索“蓝色 T恤”,这种图片相关性会比较有价值。
  245 +2. 我不考虑颜色的差异,其余的款式一般是大小之类的。这些图片,项链细粉到 SKU 维度,可能价值不大,性价比偏低
243 246
244 247
245 248
docs/常用查询 - ES.md
@@ -654,3 +654,18 @@ GET /search_products_tenant_170/_search @@ -654,3 +654,18 @@ GET /search_products_tenant_170/_search
654 } 654 }
655 } 655 }
656 } 656 }
  657 +
  658 +
  659 +检查某个字段是否存在
  660 +curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \
  661 + 'http://localhost:9200/search_products_tenant_163/_count' \
  662 + -H 'Content-Type: application/json' \
  663 + -d '{
  664 + "query": {
  665 + "bool": {
  666 + "filter": [
  667 + { "exists": { "field": "title_embedding" } }
  668 + ]
  669 + }
  670 + }
  671 + }'
657 \ No newline at end of file 672 \ No newline at end of file
search/es_query_builder.py
@@ -33,8 +33,8 @@ class ESQueryBuilder: @@ -33,8 +33,8 @@ class ESQueryBuilder:
33 function_score_config: Optional[FunctionScoreConfig] = None, 33 function_score_config: Optional[FunctionScoreConfig] = None,
34 default_language: str = "en", 34 default_language: str = "en",
35 knn_boost: float = 0.25, 35 knn_boost: float = 0.25,
36 - base_minimum_should_match: str = "75%",  
37 - translation_minimum_should_match: str = "75%", 36 + base_minimum_should_match: str = "70%",
  37 + translation_minimum_should_match: str = "70%",
38 translation_boost: float = 0.4, 38 translation_boost: float = 0.4,
39 translation_boost_when_source_missing: float = 1.0, 39 translation_boost_when_source_missing: float = 1.0,
40 source_boost_when_missing: float = 0.6, 40 source_boost_when_missing: float = 0.6,
@@ -261,16 +261,13 @@ class ESQueryBuilder: @@ -261,16 +261,13 @@ class ESQueryBuilder:
261 if parsed_query: 261 if parsed_query:
262 query_tokens = getattr(parsed_query, 'query_tokens', None) or [] 262 query_tokens = getattr(parsed_query, 'query_tokens', None) or []
263 token_count = len(query_tokens) 263 token_count = len(query_tokens)
264 - if token_count <= 2:  
265 - knn_k, knn_num_candidates = 30, 100  
266 - knn_boost = self.knn_boost * 0.6 # Lower weight for short queries  
267 - elif token_count >= 5:  
268 - knn_k, knn_num_candidates = 80, 300 264 + if token_count >= 5:
  265 + knn_k, knn_num_candidates = 160, 500
269 knn_boost = self.knn_boost * 1.4 # Higher weight for long queries 266 knn_boost = self.knn_boost * 1.4 # Higher weight for long queries
270 else: 267 else:
271 - knn_k, knn_num_candidates = 50, 200 268 + knn_k, knn_num_candidates = 120, 400
272 else: 269 else:
273 - knn_k, knn_num_candidates = 50, 200 270 + knn_k, knn_num_candidates = 120, 400
274 knn_clause = { 271 knn_clause = {
275 "field": self.text_embedding_field, 272 "field": self.text_embedding_field,
276 "query_vector": query_vector.tolist(), 273 "query_vector": query_vector.tolist(),