Commit 272aeabee72d2d8ef22a514c5d50cb12c2948aed

Authored by tangwang
1 parent a7cc9078

调参

config/loader.py
... ... @@ -281,8 +281,8 @@ class AppConfigLoader:
281 281 ["title", "brief", "vendor", "category_name_text"],
282 282 )
283 283 ),
284   - base_minimum_should_match=str(text_strategy.get("base_minimum_should_match", "75%")),
285   - translation_minimum_should_match=str(text_strategy.get("translation_minimum_should_match", "75%")),
  284 + base_minimum_should_match=str(text_strategy.get("base_minimum_should_match", "70%")),
  285 + translation_minimum_should_match=str(text_strategy.get("translation_minimum_should_match", "70%")),
286 286 translation_boost=float(text_strategy.get("translation_boost", 0.4)),
287 287 translation_boost_when_source_missing=float(
288 288 text_strategy.get("translation_boost_when_source_missing", 1.0)
... ...
config/schema.py
... ... @@ -51,8 +51,8 @@ class QueryConfig:
51 51 core_multilingual_fields: List[str] = field(
52 52 default_factory=lambda: ["title", "brief", "vendor", "category_name_text"]
53 53 )
54   - base_minimum_should_match: str = "75%"
55   - translation_minimum_should_match: str = "75%"
  54 + base_minimum_should_match: str = "70%"
  55 + translation_minimum_should_match: str = "70%"
56 56 translation_boost: float = 0.4
57 57 translation_boost_when_source_missing: float = 1.0
58 58 source_boost_when_missing: float = 0.6
... ...
docs/TODO.txt
... ... @@ -236,10 +236,13 @@ config/environments/<env>.yaml
236 236  
237 237  
238 238  
  239 +筛选SKU: 先只筛选第一个维度,但考虑到用户搜索词可能带了尺码,所以第二、三个维度也要考虑
239 240  
240 241  
241   -
242   -
  242 +引入图片的相关性:
  243 +图片的向量最好做SKU维度,用 SPU 维度还是 SKU 维度?
  244 +1. SKU维度(主款式,option1维度),如果用户搜索“蓝色 T恤”,这种图片相关性会比较有价值。
  245 +2. 我不考虑颜色的差异,其余的款式一般是大小之类的。这些图片,项链细粉到 SKU 维度,可能价值不大,性价比偏低
243 246  
244 247  
245 248  
... ...
docs/常用查询 - ES.md
... ... @@ -654,3 +654,18 @@ GET /search_products_tenant_170/_search
654 654 }
655 655 }
656 656 }
  657 +
  658 +
  659 +检查某个字段是否存在
  660 +curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \
  661 + 'http://localhost:9200/search_products_tenant_163/_count' \
  662 + -H 'Content-Type: application/json' \
  663 + -d '{
  664 + "query": {
  665 + "bool": {
  666 + "filter": [
  667 + { "exists": { "field": "title_embedding" } }
  668 + ]
  669 + }
  670 + }
  671 + }'
657 672 \ No newline at end of file
... ...
search/es_query_builder.py
... ... @@ -33,8 +33,8 @@ class ESQueryBuilder:
33 33 function_score_config: Optional[FunctionScoreConfig] = None,
34 34 default_language: str = "en",
35 35 knn_boost: float = 0.25,
36   - base_minimum_should_match: str = "75%",
37   - translation_minimum_should_match: str = "75%",
  36 + base_minimum_should_match: str = "70%",
  37 + translation_minimum_should_match: str = "70%",
38 38 translation_boost: float = 0.4,
39 39 translation_boost_when_source_missing: float = 1.0,
40 40 source_boost_when_missing: float = 0.6,
... ... @@ -261,16 +261,13 @@ class ESQueryBuilder:
261 261 if parsed_query:
262 262 query_tokens = getattr(parsed_query, 'query_tokens', None) or []
263 263 token_count = len(query_tokens)
264   - if token_count <= 2:
265   - knn_k, knn_num_candidates = 30, 100
266   - knn_boost = self.knn_boost * 0.6 # Lower weight for short queries
267   - elif token_count >= 5:
268   - knn_k, knn_num_candidates = 80, 300
  264 + if token_count >= 5:
  265 + knn_k, knn_num_candidates = 160, 500
269 266 knn_boost = self.knn_boost * 1.4 # Higher weight for long queries
270 267 else:
271   - knn_k, knn_num_candidates = 50, 200
  268 + knn_k, knn_num_candidates = 120, 400
272 269 else:
273   - knn_k, knn_num_candidates = 50, 200
  270 + knn_k, knn_num_candidates = 120, 400
274 271 knn_clause = {
275 272 "field": self.text_embedding_field,
276 273 "query_vector": query_vector.tolist(),
... ...