Commit 272aeabee72d2d8ef22a514c5d50cb12c2948aed
1 parent
a7cc9078
调参
Showing
5 changed files
with
30 additions
and
15 deletions
Show diff stats
config/loader.py
| ... | ... | @@ -281,8 +281,8 @@ class AppConfigLoader: |
| 281 | 281 | ["title", "brief", "vendor", "category_name_text"], |
| 282 | 282 | ) |
| 283 | 283 | ), |
| 284 | - base_minimum_should_match=str(text_strategy.get("base_minimum_should_match", "75%")), | |
| 285 | - translation_minimum_should_match=str(text_strategy.get("translation_minimum_should_match", "75%")), | |
| 284 | + base_minimum_should_match=str(text_strategy.get("base_minimum_should_match", "70%")), | |
| 285 | + translation_minimum_should_match=str(text_strategy.get("translation_minimum_should_match", "70%")), | |
| 286 | 286 | translation_boost=float(text_strategy.get("translation_boost", 0.4)), |
| 287 | 287 | translation_boost_when_source_missing=float( |
| 288 | 288 | text_strategy.get("translation_boost_when_source_missing", 1.0) | ... | ... |
config/schema.py
| ... | ... | @@ -51,8 +51,8 @@ class QueryConfig: |
| 51 | 51 | core_multilingual_fields: List[str] = field( |
| 52 | 52 | default_factory=lambda: ["title", "brief", "vendor", "category_name_text"] |
| 53 | 53 | ) |
| 54 | - base_minimum_should_match: str = "75%" | |
| 55 | - translation_minimum_should_match: str = "75%" | |
| 54 | + base_minimum_should_match: str = "70%" | |
| 55 | + translation_minimum_should_match: str = "70%" | |
| 56 | 56 | translation_boost: float = 0.4 |
| 57 | 57 | translation_boost_when_source_missing: float = 1.0 |
| 58 | 58 | source_boost_when_missing: float = 0.6 | ... | ... |
docs/TODO.txt
| ... | ... | @@ -236,10 +236,13 @@ config/environments/<env>.yaml |
| 236 | 236 | |
| 237 | 237 | |
| 238 | 238 | |
| 239 | +筛选SKU: 先只筛选第一个维度,但考虑到用户搜索词可能带了尺码,所以第二、三个维度也要考虑 | |
| 239 | 240 | |
| 240 | 241 | |
| 241 | - | |
| 242 | - | |
| 242 | +引入图片的相关性: | |
| 243 | +图片的向量最好做SKU维度,用 SPU 维度还是 SKU 维度? | |
| 244 | +1. SKU维度(主款式,option1维度),如果用户搜索“蓝色 T恤”,这种图片相关性会比较有价值。 | |
| 245 | +2. 我不考虑颜色的差异,其余的款式一般是大小之类的。这些图片,项链细粉到 SKU 维度,可能价值不大,性价比偏低 | |
| 243 | 246 | |
| 244 | 247 | |
| 245 | 248 | ... | ... |
docs/常用查询 - ES.md
| ... | ... | @@ -654,3 +654,18 @@ GET /search_products_tenant_170/_search |
| 654 | 654 | } |
| 655 | 655 | } |
| 656 | 656 | } |
| 657 | + | |
| 658 | + | |
| 659 | +检查某个字段是否存在 | |
| 660 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \ | |
| 661 | + 'http://localhost:9200/search_products_tenant_163/_count' \ | |
| 662 | + -H 'Content-Type: application/json' \ | |
| 663 | + -d '{ | |
| 664 | + "query": { | |
| 665 | + "bool": { | |
| 666 | + "filter": [ | |
| 667 | + { "exists": { "field": "title_embedding" } } | |
| 668 | + ] | |
| 669 | + } | |
| 670 | + } | |
| 671 | + }' | |
| 657 | 672 | \ No newline at end of file | ... | ... |
search/es_query_builder.py
| ... | ... | @@ -33,8 +33,8 @@ class ESQueryBuilder: |
| 33 | 33 | function_score_config: Optional[FunctionScoreConfig] = None, |
| 34 | 34 | default_language: str = "en", |
| 35 | 35 | knn_boost: float = 0.25, |
| 36 | - base_minimum_should_match: str = "75%", | |
| 37 | - translation_minimum_should_match: str = "75%", | |
| 36 | + base_minimum_should_match: str = "70%", | |
| 37 | + translation_minimum_should_match: str = "70%", | |
| 38 | 38 | translation_boost: float = 0.4, |
| 39 | 39 | translation_boost_when_source_missing: float = 1.0, |
| 40 | 40 | source_boost_when_missing: float = 0.6, |
| ... | ... | @@ -261,16 +261,13 @@ class ESQueryBuilder: |
| 261 | 261 | if parsed_query: |
| 262 | 262 | query_tokens = getattr(parsed_query, 'query_tokens', None) or [] |
| 263 | 263 | token_count = len(query_tokens) |
| 264 | - if token_count <= 2: | |
| 265 | - knn_k, knn_num_candidates = 30, 100 | |
| 266 | - knn_boost = self.knn_boost * 0.6 # Lower weight for short queries | |
| 267 | - elif token_count >= 5: | |
| 268 | - knn_k, knn_num_candidates = 80, 300 | |
| 264 | + if token_count >= 5: | |
| 265 | + knn_k, knn_num_candidates = 160, 500 | |
| 269 | 266 | knn_boost = self.knn_boost * 1.4 # Higher weight for long queries |
| 270 | 267 | else: |
| 271 | - knn_k, knn_num_candidates = 50, 200 | |
| 268 | + knn_k, knn_num_candidates = 120, 400 | |
| 272 | 269 | else: |
| 273 | - knn_k, knn_num_candidates = 50, 200 | |
| 270 | + knn_k, knn_num_candidates = 120, 400 | |
| 274 | 271 | knn_clause = { |
| 275 | 272 | "field": self.text_embedding_field, |
| 276 | 273 | "query_vector": query_vector.tolist(), | ... | ... |