diff --git a/config/loader.py b/config/loader.py index c158505..282d471 100644 --- a/config/loader.py +++ b/config/loader.py @@ -281,8 +281,8 @@ class AppConfigLoader: ["title", "brief", "vendor", "category_name_text"], ) ), - base_minimum_should_match=str(text_strategy.get("base_minimum_should_match", "75%")), - translation_minimum_should_match=str(text_strategy.get("translation_minimum_should_match", "75%")), + base_minimum_should_match=str(text_strategy.get("base_minimum_should_match", "70%")), + translation_minimum_should_match=str(text_strategy.get("translation_minimum_should_match", "70%")), translation_boost=float(text_strategy.get("translation_boost", 0.4)), translation_boost_when_source_missing=float( text_strategy.get("translation_boost_when_source_missing", 1.0) diff --git a/config/schema.py b/config/schema.py index 8a03472..f570f58 100644 --- a/config/schema.py +++ b/config/schema.py @@ -51,8 +51,8 @@ class QueryConfig: core_multilingual_fields: List[str] = field( default_factory=lambda: ["title", "brief", "vendor", "category_name_text"] ) - base_minimum_should_match: str = "75%" - translation_minimum_should_match: str = "75%" + base_minimum_should_match: str = "70%" + translation_minimum_should_match: str = "70%" translation_boost: float = 0.4 translation_boost_when_source_missing: float = 1.0 source_boost_when_missing: float = 0.6 diff --git a/docs/TODO.txt b/docs/TODO.txt index 5e6eaea..f628ff9 100644 --- a/docs/TODO.txt +++ b/docs/TODO.txt @@ -236,10 +236,13 @@ config/environments/.yaml +筛选SKU: 先只筛选第一个维度,但考虑到用户搜索词可能带了尺码,所以第二、三个维度也要考虑 - - +引入图片的相关性: +图片的向量最好做SKU维度,用 SPU 维度还是 SKU 维度? +1. SKU维度(主款式,option1维度),如果用户搜索“蓝色 T恤”,这种图片相关性会比较有价值。 +2. 我不考虑颜色的差异,其余的款式一般是大小之类的。这些图片,项链细粉到 SKU 维度,可能价值不大,性价比偏低 diff --git a/docs/常用查询 - ES.md b/docs/常用查询 - ES.md index 5bd7448..c028b8c 100644 --- a/docs/常用查询 - ES.md +++ b/docs/常用查询 - ES.md @@ -654,3 +654,18 @@ GET /search_products_tenant_170/_search } } } + + +检查某个字段是否存在 +curl -u 'saas:4hOaLaf41y2VuI8y' -X POST \ + 'http://localhost:9200/search_products_tenant_163/_count' \ + -H 'Content-Type: application/json' \ + -d '{ + "query": { + "bool": { + "filter": [ + { "exists": { "field": "title_embedding" } } + ] + } + } + }' \ No newline at end of file diff --git a/search/es_query_builder.py b/search/es_query_builder.py index 7266aa4..9dc25ad 100644 --- a/search/es_query_builder.py +++ b/search/es_query_builder.py @@ -33,8 +33,8 @@ class ESQueryBuilder: function_score_config: Optional[FunctionScoreConfig] = None, default_language: str = "en", knn_boost: float = 0.25, - base_minimum_should_match: str = "75%", - translation_minimum_should_match: str = "75%", + base_minimum_should_match: str = "70%", + translation_minimum_should_match: str = "70%", translation_boost: float = 0.4, translation_boost_when_source_missing: float = 1.0, source_boost_when_missing: float = 0.6, @@ -261,16 +261,13 @@ class ESQueryBuilder: if parsed_query: query_tokens = getattr(parsed_query, 'query_tokens', None) or [] token_count = len(query_tokens) - if token_count <= 2: - knn_k, knn_num_candidates = 30, 100 - knn_boost = self.knn_boost * 0.6 # Lower weight for short queries - elif token_count >= 5: - knn_k, knn_num_candidates = 80, 300 + if token_count >= 5: + knn_k, knn_num_candidates = 160, 500 knn_boost = self.knn_boost * 1.4 # Higher weight for long queries else: - knn_k, knn_num_candidates = 50, 200 + knn_k, knn_num_candidates = 120, 400 else: - knn_k, knn_num_candidates = 50, 200 + knn_k, knn_num_candidates = 120, 400 knn_clause = { "field": self.text_embedding_field, "query_vector": query_vector.tolist(), -- libgit2 0.21.2