Commit f8219b5e190aab5295698dca776574338a79d85f

Authored by tangwang
1 parent db9c469c

1.

must里面的两个combined_fields查询,boost分别设置为2和0.6,和其他查询条件一起,都放到should里面,设置minimum_should_match==1
2.
如果keywords_query跟combined_fields主查询的query一样,那么不需要再添加了
query/query_parser.py
... ... @@ -310,6 +310,7 @@ class QueryParser:
310 310 Returns:
311 311 ParsedQuery object with all processing results
312 312 """
  313 + parse_t0 = time.perf_counter()
313 314 # Initialize logger if context provided
314 315 active_logger = context.logger if context else logger
315 316 if context and hasattr(context, "logger"):
... ... @@ -615,17 +616,23 @@ class QueryParser:
615 616 product_title_exclusion_profile=product_title_exclusion_profile,
616 617 )
617 618  
  619 + parse_total_ms = (time.perf_counter() - parse_t0) * 1000.0
  620 + completion_tail = (
  621 + f"Translation count: {len(translations)} | "
  622 + f"Vector: {'yes' if query_vector is not None else 'no'} | "
  623 + f"Image vector: {'yes' if image_query_vector is not None else 'no'} | "
  624 + f"parse_total_ms={parse_total_ms:.1f}"
  625 + )
618 626 if context and hasattr(context, 'logger'):
619 627 context.logger.info(
620 628 f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | "
621   - f"Translation count: {len(translations)} | Vector: {'yes' if query_vector is not None else 'no'} | "
622   - f"Image vector: {'yes' if image_query_vector is not None else 'no'}",
  629 + f"Language: {detected_lang} | {completion_tail}",
623 630 extra={'reqid': context.reqid, 'uid': context.uid}
624 631 )
625 632 else:
626 633 logger.info(
627 634 f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | "
628   - f"Language: {detected_lang}"
  635 + f"Language: {detected_lang} | {completion_tail}"
629 636 )
630 637  
631 638 return result
... ...
search/es_query_builder.py
... ... @@ -530,42 +530,46 @@ class ESQueryBuilder:
530 530 minimum_should_match = (
531 531 self.base_minimum_should_match if is_source else self.translation_minimum_should_match
532 532 )
533   - must_clauses: List[Dict[str, Any]] = [
  533 + kw = (keywords_query or "").strip()
  534 + main_query = (lang_query or "").strip()
  535 + combined_must: List[Dict[str, Any]] = [
534 536 {
535 537 "combined_fields": {
536   - "query": lang_query,
  538 + "query": main_query,
537 539 "fields": combined_fields,
538 540 "minimum_should_match": minimum_should_match,
  541 + "boost": 2.0,
539 542 }
540 543 }
541 544 ]
542   - kw = (keywords_query or "").strip()
543   - if kw:
544   - must_clauses.append(
  545 + if kw and kw != main_query:
  546 + combined_must.append(
545 547 {
546 548 "combined_fields": {
547 549 "query": kw,
548 550 "fields": combined_fields,
549 551 "minimum_should_match": self.keywords_minimum_should_match,
  552 + "boost": 0.6,
550 553 }
551 554 }
552 555 )
553   - should_clauses = [
  556 + optional_mm = [
554 557 clause
555 558 for clause in (
556   - self._build_best_fields_clause(lang, lang_query),
557   - self._build_phrase_clause(lang, lang_query),
  559 + self._build_best_fields_clause(lang, main_query),
  560 + self._build_phrase_clause(lang, main_query),
558 561 )
559 562 if clause
560 563 ]
  564 + should_clauses: List[Dict[str, Any]] = [{"bool": {"must": combined_must}}]
  565 + should_clauses.extend(optional_mm)
561 566 clause: Dict[str, Any] = {
562 567 "bool": {
563 568 "_name": clause_name,
564   - "must": must_clauses,
  569 + "should": should_clauses,
  570 + "minimum_should_match": 1,
565 571 }
566 572 }
567   - if should_clauses:
568   - clause["bool"]["should"] = should_clauses
569 573 if not is_source:
570 574 clause["bool"]["boost"] = float(self.translation_boost)
571 575 return clause
... ...
tests/test_es_query_builder.py
... ... @@ -112,7 +112,8 @@ def test_text_query_contains_only_base_and_translation_named_queries():
112 112  
113 113 assert names == ["base_query", "base_query_trans_zh"]
114 114 base_should = should[0]["bool"]["should"]
115   - assert [clause["multi_match"]["type"] for clause in base_should] == ["best_fields", "phrase"]
  115 + mm_types = [c["multi_match"]["type"] for c in base_should if "multi_match" in c]
  116 + assert mm_types == ["best_fields", "phrase"]
116 117  
117 118  
118 119 def test_text_query_skips_duplicate_translation_same_as_base():
... ... @@ -134,7 +135,8 @@ def test_text_query_skips_duplicate_translation_same_as_base():
134 135 query_root = query_root["function_score"]["query"]
135 136 base_bool = query_root["bool"]
136 137 assert base_bool["_name"] == "base_query"
137   - assert [clause["multi_match"]["type"] for clause in base_bool["should"]] == ["best_fields", "phrase"]
  138 + mm_types = [c["multi_match"]["type"] for c in base_bool["should"] if "multi_match" in c]
  139 + assert mm_types == ["best_fields", "phrase"]
138 140  
139 141  
140 142 def test_product_title_exclusion_filter_is_applied_once_on_outer_query():
... ...
tests/test_es_query_builder_text_recall_languages.py
... ... @@ -63,8 +63,13 @@ def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
63 63 return out
64 64  
65 65  
  66 +def _combined_fields_must(clause: Dict[str, Any]) -> List[Dict[str, Any]]:
  67 + first = (clause.get("should") or [])[0]
  68 + return list(first["bool"]["must"])
  69 +
  70 +
66 71 def _combined_fields_clause(clause: Dict[str, Any]) -> Dict[str, Any]:
67   - return clause["must"][0]["combined_fields"]
  72 + return _combined_fields_must(clause)[0]["combined_fields"]
68 73  
69 74  
70 75 def _should_multi_matches(clause: Dict[str, Any]) -> List[Dict[str, Any]]:
... ... @@ -131,7 +136,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en():
131 136  
132 137  
133 138 def test_keywords_combined_fields_second_must_same_fields_and_50pct():
134   - """When ParsedQuery.keywords_queries is set, must includes a second combined_fields."""
  139 + """When ParsedQuery.keywords_queries is set, inner must has two boosted combined_fields."""
135 140 qb = _builder_multilingual_title_only(default_language="en")
136 141 parsed = SimpleNamespace(
137 142 rewritten_query="连衣裙",
... ... @@ -142,15 +147,36 @@ def test_keywords_combined_fields_second_must_same_fields_and_50pct():
142 147 q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False)
143 148 idx = _clauses_index(q)
144 149 base = idx["base_query"]
145   - assert len(base["must"]) == 2
146   - assert base["must"][0]["combined_fields"]["query"] == "连衣裙"
147   - assert base["must"][1]["combined_fields"]["query"] == "连衣 裙"
148   - assert base["must"][1]["combined_fields"]["minimum_should_match"] == "50%"
149   - assert base["must"][1]["combined_fields"]["fields"] == base["must"][0]["combined_fields"]["fields"]
  150 + assert base["minimum_should_match"] == 1
  151 + bm = _combined_fields_must(base)
  152 + assert len(bm) == 2
  153 + assert bm[0]["combined_fields"]["query"] == "连衣裙"
  154 + assert bm[0]["combined_fields"]["boost"] == 2.0
  155 + assert bm[1]["combined_fields"]["query"] == "连衣 裙"
  156 + assert bm[1]["combined_fields"]["minimum_should_match"] == "50%"
  157 + assert bm[1]["combined_fields"]["boost"] == 0.6
  158 + assert bm[1]["combined_fields"]["fields"] == bm[0]["combined_fields"]["fields"]
150 159 trans = idx["base_query_trans_en"]
151   - assert len(trans["must"]) == 2
152   - assert trans["must"][1]["combined_fields"]["query"] == "dress"
153   - assert trans["must"][1]["combined_fields"]["minimum_should_match"] == "50%"
  160 + assert trans["minimum_should_match"] == 1
  161 + tm = _combined_fields_must(trans)
  162 + assert len(tm) == 2
  163 + assert tm[1]["combined_fields"]["query"] == "dress"
  164 + assert tm[1]["combined_fields"]["minimum_should_match"] == "50%"
  165 + assert tm[1]["combined_fields"]["boost"] == 0.6
  166 +
  167 +
  168 +def test_keywords_omitted_when_same_as_main_combined_fields_query():
  169 + """No second combined_fields when keywords query equals the main lexical query."""
  170 + qb = _builder_multilingual_title_only(default_language="en")
  171 + parsed = SimpleNamespace(
  172 + rewritten_query="连衣裙",
  173 + detected_language="zh",
  174 + translations={},
  175 + keywords_queries={KEYWORDS_QUERY_BASE_KEY: "连衣裙"},
  176 + )
  177 + q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False)
  178 + idx = _clauses_index(q)
  179 + assert len(_combined_fields_must(idx["base_query"])) == 1
154 180  
155 181  
156 182 def test_en_query_index_zh_en_includes_base_en_and_trans_zh():
... ...