1.

must里面的两个combined_fields查询，boost分别设置为2和0.6，和其他查询条件一起，都放到should里面，设置minimum_should_match==1 2. 如果keywords_query跟combined_fields主查询的query一样，那么不需要再添加了

1.
must里面的两个combined_fields查询，boost分别设置为2和0.6，和其他查询条件一起，都放到should里面，设置minimum_should_match==1 2. 如果keywords_query跟combined_fields主查询的query一样，那么不需要再添加了
tangwang
1 parent db9c469c
Showing 4 changed files with 65 additions and 26 deletions Show diff stats
query/query_parser.py
search/es_query_builder.py
tests/test_es_query_builder.py
tests/test_es_query_builder_text_recall_languages.py
@@ -310,6 +310,7 @@ class QueryParser:
         Returns:
             ParsedQuery object with all processing results
         """
+        parse_t0 = time.perf_counter()
         # Initialize logger if context provided
         active_logger = context.logger if context else logger
         if context and hasattr(context, "logger"):
@@ -615,17 +616,23 @@ class QueryParser:
             product_title_exclusion_profile=product_title_exclusion_profile,
         )
+        parse_total_ms = (time.perf_counter() - parse_t0) * 1000.0
+        completion_tail = (
+            f"Translation count: {len(translations)} | "
+            f"Vector: {'yes' if query_vector is not None else 'no'} | "
+            f"Image vector: {'yes' if image_query_vector is not None else 'no'} | "
+            f"parse_total_ms={parse_total_ms:.1f}"
+        )
         if context and hasattr(context, 'logger'):
             context.logger.info(
                 f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | "
-                f"Translation count: {len(translations)} | Vector: {'yes' if query_vector is not None else 'no'} | "
-                f"Image vector: {'yes' if image_query_vector is not None else 'no'}",
+                f"Language: {detected_lang} | {completion_tail}",
                 extra={'reqid': context.reqid, 'uid': context.uid}
             )
         else:
             logger.info(
                 f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | "
-                f"Language: {detected_lang}"
+                f"Language: {detected_lang} | {completion_tail}"
             )
         return result
@@ -530,42 +530,46 @@ class ESQueryBuilder:
         minimum_should_match = (
             self.base_minimum_should_match if is_source else self.translation_minimum_should_match
         )
-        must_clauses: List[Dict[str, Any]] = [
+        kw = (keywords_query or "").strip()
+        main_query = (lang_query or "").strip()
+        combined_must: List[Dict[str, Any]] = [
             {
                 "combined_fields": {
-                    "query": lang_query,
+                    "query": main_query,
                     "fields": combined_fields,
                     "minimum_should_match": minimum_should_match,
+                    "boost": 2.0,
                 }
             }
         ]
-        kw = (keywords_query or "").strip()
-        if kw:
-            must_clauses.append(
+        if kw and kw != main_query:
+            combined_must.append(
                 {
                     "combined_fields": {
                         "query": kw,
                         "fields": combined_fields,
                         "minimum_should_match": self.keywords_minimum_should_match,
+                        "boost": 0.6,
                     }
                 }
             )
-        should_clauses = [
+        optional_mm = [
             clause
             for clause in (
-                self._build_best_fields_clause(lang, lang_query),
-                self._build_phrase_clause(lang, lang_query),
+                self._build_best_fields_clause(lang, main_query),
+                self._build_phrase_clause(lang, main_query),
             )
             if clause
         ]
+        should_clauses: List[Dict[str, Any]] = [{"bool": {"must": combined_must}}]
+        should_clauses.extend(optional_mm)
         clause: Dict[str, Any] = {
             "bool": {
                 "_name": clause_name,
-                "must": must_clauses,
+                "should": should_clauses,
+                "minimum_should_match": 1,
             }
         }
-        if should_clauses:
-            clause["bool"]["should"] = should_clauses
         if not is_source:
             clause["bool"]["boost"] = float(self.translation_boost)
         return clause
@@ -112,7 +112,8 @@ def test_text_query_contains_only_base_and_translation_named_queries():
     assert names == ["base_query", "base_query_trans_zh"]
     base_should = should[0]["bool"]["should"]
-    assert [clause["multi_match"]["type"] for clause in base_should] == ["best_fields", "phrase"]
+    mm_types = [c["multi_match"]["type"] for c in base_should if "multi_match" in c]
+    assert mm_types == ["best_fields", "phrase"]
 def test_text_query_skips_duplicate_translation_same_as_base():
@@ -134,7 +135,8 @@ def test_text_query_skips_duplicate_translation_same_as_base():
         query_root = query_root["function_score"]["query"]
     base_bool = query_root["bool"]
     assert base_bool["_name"] == "base_query"
-    assert [clause["multi_match"]["type"] for clause in base_bool["should"]] == ["best_fields", "phrase"]
+    mm_types = [c["multi_match"]["type"] for c in base_bool["should"] if "multi_match" in c]
+    assert mm_types == ["best_fields", "phrase"]
 def test_product_title_exclusion_filter_is_applied_once_on_outer_query():
@@ -63,8 +63,13 @@ def _clauses_index(es_body: Dict[str, Any]) -&gt; Dict[str, Dict[str, Any]]:
     return out
+def _combined_fields_must(clause: Dict[str, Any]) -> List[Dict[str, Any]]:
+    first = (clause.get("should") or [])[0]
+    return list(first["bool"]["must"])
+
+
 def _combined_fields_clause(clause: Dict[str, Any]) -> Dict[str, Any]:
-    return clause["must"][0]["combined_fields"]
+    return _combined_fields_must(clause)[0]["combined_fields"]
 def _should_multi_matches(clause: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -131,7 +136,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en():
 def test_keywords_combined_fields_second_must_same_fields_and_50pct():
-    """When ParsedQuery.keywords_queries is set, must includes a second combined_fields."""
+    """When ParsedQuery.keywords_queries is set, inner must has two boosted combined_fields."""
     qb = _builder_multilingual_title_only(default_language="en")
     parsed = SimpleNamespace(
         rewritten_query="连衣裙",
@@ -142,15 +147,36 @@ def test_keywords_combined_fields_second_must_same_fields_and_50pct():
     q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False)
     idx = _clauses_index(q)
     base = idx["base_query"]
-    assert len(base["must"]) == 2
-    assert base["must"][0]["combined_fields"]["query"] == "连衣裙"
-    assert base["must"][1]["combined_fields"]["query"] == "连衣 裙"
-    assert base["must"][1]["combined_fields"]["minimum_should_match"] == "50%"
-    assert base["must"][1]["combined_fields"]["fields"] == base["must"][0]["combined_fields"]["fields"]
+    assert base["minimum_should_match"] == 1
+    bm = _combined_fields_must(base)
+    assert len(bm) == 2
+    assert bm[0]["combined_fields"]["query"] == "连衣裙"
+    assert bm[0]["combined_fields"]["boost"] == 2.0
+    assert bm[1]["combined_fields"]["query"] == "连衣 裙"
+    assert bm[1]["combined_fields"]["minimum_should_match"] == "50%"
+    assert bm[1]["combined_fields"]["boost"] == 0.6
+    assert bm[1]["combined_fields"]["fields"] == bm[0]["combined_fields"]["fields"]
     trans = idx["base_query_trans_en"]
-    assert len(trans["must"]) == 2
-    assert trans["must"][1]["combined_fields"]["query"] == "dress"
-    assert trans["must"][1]["combined_fields"]["minimum_should_match"] == "50%"
+    assert trans["minimum_should_match"] == 1
+    tm = _combined_fields_must(trans)
+    assert len(tm) == 2
+    assert tm[1]["combined_fields"]["query"] == "dress"
+    assert tm[1]["combined_fields"]["minimum_should_match"] == "50%"
+    assert tm[1]["combined_fields"]["boost"] == 0.6
+
+
+def test_keywords_omitted_when_same_as_main_combined_fields_query():
+    """No second combined_fields when keywords query equals the main lexical query."""
+    qb = _builder_multilingual_title_only(default_language="en")
+    parsed = SimpleNamespace(
+        rewritten_query="连衣裙",
+        detected_language="zh",
+        translations={},
+        keywords_queries={KEYWORDS_QUERY_BASE_KEY: "连衣裙"},
+    )
+    q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False)
+    idx = _clauses_index(q)
+    assert len(_combined_fields_must(idx["base_query"])) == 1
 def test_en_query_index_zh_en_includes_base_en_and_trans_zh():