From f8219b5e190aab5295698dca776574338a79d85f Mon Sep 17 00:00:00 2001 From: tangwang Date: Mon, 30 Mar 2026 21:49:36 +0800 Subject: [PATCH] 1. must里面的两个combined_fields查询,boost分别设置为2和0.6,和其他查询条件一起,都放到should里面,设置minimum_should_match==1 2. 如果keywords_query跟combined_fields主查询的query一样,那么不需要再添加了 --- query/query_parser.py | 13 ++++++++++--- search/es_query_builder.py | 26 +++++++++++++++----------- tests/test_es_query_builder.py | 6 ++++-- tests/test_es_query_builder_text_recall_languages.py | 46 ++++++++++++++++++++++++++++++++++++---------- 4 files changed, 65 insertions(+), 26 deletions(-) diff --git a/query/query_parser.py b/query/query_parser.py index 696495e..89ee0d6 100644 --- a/query/query_parser.py +++ b/query/query_parser.py @@ -310,6 +310,7 @@ class QueryParser: Returns: ParsedQuery object with all processing results """ + parse_t0 = time.perf_counter() # Initialize logger if context provided active_logger = context.logger if context else logger if context and hasattr(context, "logger"): @@ -615,17 +616,23 @@ class QueryParser: product_title_exclusion_profile=product_title_exclusion_profile, ) + parse_total_ms = (time.perf_counter() - parse_t0) * 1000.0 + completion_tail = ( + f"Translation count: {len(translations)} | " + f"Vector: {'yes' if query_vector is not None else 'no'} | " + f"Image vector: {'yes' if image_query_vector is not None else 'no'} | " + f"parse_total_ms={parse_total_ms:.1f}" + ) if context and hasattr(context, 'logger'): context.logger.info( f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " - f"Translation count: {len(translations)} | Vector: {'yes' if query_vector is not None else 'no'} | " - f"Image vector: {'yes' if image_query_vector is not None else 'no'}", + f"Language: {detected_lang} | {completion_tail}", extra={'reqid': context.reqid, 'uid': context.uid} ) else: logger.info( f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " - f"Language: {detected_lang}" + f"Language: {detected_lang} | {completion_tail}" ) return result diff --git a/search/es_query_builder.py b/search/es_query_builder.py index 2ad5fac..0292d9f 100644 --- a/search/es_query_builder.py +++ b/search/es_query_builder.py @@ -530,42 +530,46 @@ class ESQueryBuilder: minimum_should_match = ( self.base_minimum_should_match if is_source else self.translation_minimum_should_match ) - must_clauses: List[Dict[str, Any]] = [ + kw = (keywords_query or "").strip() + main_query = (lang_query or "").strip() + combined_must: List[Dict[str, Any]] = [ { "combined_fields": { - "query": lang_query, + "query": main_query, "fields": combined_fields, "minimum_should_match": minimum_should_match, + "boost": 2.0, } } ] - kw = (keywords_query or "").strip() - if kw: - must_clauses.append( + if kw and kw != main_query: + combined_must.append( { "combined_fields": { "query": kw, "fields": combined_fields, "minimum_should_match": self.keywords_minimum_should_match, + "boost": 0.6, } } ) - should_clauses = [ + optional_mm = [ clause for clause in ( - self._build_best_fields_clause(lang, lang_query), - self._build_phrase_clause(lang, lang_query), + self._build_best_fields_clause(lang, main_query), + self._build_phrase_clause(lang, main_query), ) if clause ] + should_clauses: List[Dict[str, Any]] = [{"bool": {"must": combined_must}}] + should_clauses.extend(optional_mm) clause: Dict[str, Any] = { "bool": { "_name": clause_name, - "must": must_clauses, + "should": should_clauses, + "minimum_should_match": 1, } } - if should_clauses: - clause["bool"]["should"] = should_clauses if not is_source: clause["bool"]["boost"] = float(self.translation_boost) return clause diff --git a/tests/test_es_query_builder.py b/tests/test_es_query_builder.py index 03c8448..4e35cff 100644 --- a/tests/test_es_query_builder.py +++ b/tests/test_es_query_builder.py @@ -112,7 +112,8 @@ def test_text_query_contains_only_base_and_translation_named_queries(): assert names == ["base_query", "base_query_trans_zh"] base_should = should[0]["bool"]["should"] - assert [clause["multi_match"]["type"] for clause in base_should] == ["best_fields", "phrase"] + mm_types = [c["multi_match"]["type"] for c in base_should if "multi_match" in c] + assert mm_types == ["best_fields", "phrase"] def test_text_query_skips_duplicate_translation_same_as_base(): @@ -134,7 +135,8 @@ def test_text_query_skips_duplicate_translation_same_as_base(): query_root = query_root["function_score"]["query"] base_bool = query_root["bool"] assert base_bool["_name"] == "base_query" - assert [clause["multi_match"]["type"] for clause in base_bool["should"]] == ["best_fields", "phrase"] + mm_types = [c["multi_match"]["type"] for c in base_bool["should"] if "multi_match" in c] + assert mm_types == ["best_fields", "phrase"] def test_product_title_exclusion_filter_is_applied_once_on_outer_query(): diff --git a/tests/test_es_query_builder_text_recall_languages.py b/tests/test_es_query_builder_text_recall_languages.py index 8cfca7c..e5e89a0 100644 --- a/tests/test_es_query_builder_text_recall_languages.py +++ b/tests/test_es_query_builder_text_recall_languages.py @@ -63,8 +63,13 @@ def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: return out +def _combined_fields_must(clause: Dict[str, Any]) -> List[Dict[str, Any]]: + first = (clause.get("should") or [])[0] + return list(first["bool"]["must"]) + + def _combined_fields_clause(clause: Dict[str, Any]) -> Dict[str, Any]: - return clause["must"][0]["combined_fields"] + return _combined_fields_must(clause)[0]["combined_fields"] def _should_multi_matches(clause: Dict[str, Any]) -> List[Dict[str, Any]]: @@ -131,7 +136,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en(): def test_keywords_combined_fields_second_must_same_fields_and_50pct(): - """When ParsedQuery.keywords_queries is set, must includes a second combined_fields.""" + """When ParsedQuery.keywords_queries is set, inner must has two boosted combined_fields.""" qb = _builder_multilingual_title_only(default_language="en") parsed = SimpleNamespace( rewritten_query="连衣裙", @@ -142,15 +147,36 @@ def test_keywords_combined_fields_second_must_same_fields_and_50pct(): q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False) idx = _clauses_index(q) base = idx["base_query"] - assert len(base["must"]) == 2 - assert base["must"][0]["combined_fields"]["query"] == "连衣裙" - assert base["must"][1]["combined_fields"]["query"] == "连衣 裙" - assert base["must"][1]["combined_fields"]["minimum_should_match"] == "50%" - assert base["must"][1]["combined_fields"]["fields"] == base["must"][0]["combined_fields"]["fields"] + assert base["minimum_should_match"] == 1 + bm = _combined_fields_must(base) + assert len(bm) == 2 + assert bm[0]["combined_fields"]["query"] == "连衣裙" + assert bm[0]["combined_fields"]["boost"] == 2.0 + assert bm[1]["combined_fields"]["query"] == "连衣 裙" + assert bm[1]["combined_fields"]["minimum_should_match"] == "50%" + assert bm[1]["combined_fields"]["boost"] == 0.6 + assert bm[1]["combined_fields"]["fields"] == bm[0]["combined_fields"]["fields"] trans = idx["base_query_trans_en"] - assert len(trans["must"]) == 2 - assert trans["must"][1]["combined_fields"]["query"] == "dress" - assert trans["must"][1]["combined_fields"]["minimum_should_match"] == "50%" + assert trans["minimum_should_match"] == 1 + tm = _combined_fields_must(trans) + assert len(tm) == 2 + assert tm[1]["combined_fields"]["query"] == "dress" + assert tm[1]["combined_fields"]["minimum_should_match"] == "50%" + assert tm[1]["combined_fields"]["boost"] == 0.6 + + +def test_keywords_omitted_when_same_as_main_combined_fields_query(): + """No second combined_fields when keywords query equals the main lexical query.""" + qb = _builder_multilingual_title_only(default_language="en") + parsed = SimpleNamespace( + rewritten_query="连衣裙", + detected_language="zh", + translations={}, + keywords_queries={KEYWORDS_QUERY_BASE_KEY: "连衣裙"}, + ) + q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False) + idx = _clauses_index(q) + assert len(_combined_fields_must(idx["base_query"])) == 1 def test_en_query_index_zh_en_includes_base_en_and_trans_zh(): -- libgit2 0.21.2