Commit f8219b5e190aab5295698dca776574338a79d85f

Authored by tangwang
1 parent db9c469c

1.

must里面的两个combined_fields查询,boost分别设置为2和0.6,和其他查询条件一起,都放到should里面,设置minimum_should_match==1
2.
如果keywords_query跟combined_fields主查询的query一样,那么不需要再添加了
query/query_parser.py
@@ -310,6 +310,7 @@ class QueryParser: @@ -310,6 +310,7 @@ class QueryParser:
310 Returns: 310 Returns:
311 ParsedQuery object with all processing results 311 ParsedQuery object with all processing results
312 """ 312 """
  313 + parse_t0 = time.perf_counter()
313 # Initialize logger if context provided 314 # Initialize logger if context provided
314 active_logger = context.logger if context else logger 315 active_logger = context.logger if context else logger
315 if context and hasattr(context, "logger"): 316 if context and hasattr(context, "logger"):
@@ -615,17 +616,23 @@ class QueryParser: @@ -615,17 +616,23 @@ class QueryParser:
615 product_title_exclusion_profile=product_title_exclusion_profile, 616 product_title_exclusion_profile=product_title_exclusion_profile,
616 ) 617 )
617 618
  619 + parse_total_ms = (time.perf_counter() - parse_t0) * 1000.0
  620 + completion_tail = (
  621 + f"Translation count: {len(translations)} | "
  622 + f"Vector: {'yes' if query_vector is not None else 'no'} | "
  623 + f"Image vector: {'yes' if image_query_vector is not None else 'no'} | "
  624 + f"parse_total_ms={parse_total_ms:.1f}"
  625 + )
618 if context and hasattr(context, 'logger'): 626 if context and hasattr(context, 'logger'):
619 context.logger.info( 627 context.logger.info(
620 f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " 628 f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | "
621 - f"Translation count: {len(translations)} | Vector: {'yes' if query_vector is not None else 'no'} | "  
622 - f"Image vector: {'yes' if image_query_vector is not None else 'no'}", 629 + f"Language: {detected_lang} | {completion_tail}",
623 extra={'reqid': context.reqid, 'uid': context.uid} 630 extra={'reqid': context.reqid, 'uid': context.uid}
624 ) 631 )
625 else: 632 else:
626 logger.info( 633 logger.info(
627 f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " 634 f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | "
628 - f"Language: {detected_lang}" 635 + f"Language: {detected_lang} | {completion_tail}"
629 ) 636 )
630 637
631 return result 638 return result
search/es_query_builder.py
@@ -530,42 +530,46 @@ class ESQueryBuilder: @@ -530,42 +530,46 @@ class ESQueryBuilder:
530 minimum_should_match = ( 530 minimum_should_match = (
531 self.base_minimum_should_match if is_source else self.translation_minimum_should_match 531 self.base_minimum_should_match if is_source else self.translation_minimum_should_match
532 ) 532 )
533 - must_clauses: List[Dict[str, Any]] = [ 533 + kw = (keywords_query or "").strip()
  534 + main_query = (lang_query or "").strip()
  535 + combined_must: List[Dict[str, Any]] = [
534 { 536 {
535 "combined_fields": { 537 "combined_fields": {
536 - "query": lang_query, 538 + "query": main_query,
537 "fields": combined_fields, 539 "fields": combined_fields,
538 "minimum_should_match": minimum_should_match, 540 "minimum_should_match": minimum_should_match,
  541 + "boost": 2.0,
539 } 542 }
540 } 543 }
541 ] 544 ]
542 - kw = (keywords_query or "").strip()  
543 - if kw:  
544 - must_clauses.append( 545 + if kw and kw != main_query:
  546 + combined_must.append(
545 { 547 {
546 "combined_fields": { 548 "combined_fields": {
547 "query": kw, 549 "query": kw,
548 "fields": combined_fields, 550 "fields": combined_fields,
549 "minimum_should_match": self.keywords_minimum_should_match, 551 "minimum_should_match": self.keywords_minimum_should_match,
  552 + "boost": 0.6,
550 } 553 }
551 } 554 }
552 ) 555 )
553 - should_clauses = [ 556 + optional_mm = [
554 clause 557 clause
555 for clause in ( 558 for clause in (
556 - self._build_best_fields_clause(lang, lang_query),  
557 - self._build_phrase_clause(lang, lang_query), 559 + self._build_best_fields_clause(lang, main_query),
  560 + self._build_phrase_clause(lang, main_query),
558 ) 561 )
559 if clause 562 if clause
560 ] 563 ]
  564 + should_clauses: List[Dict[str, Any]] = [{"bool": {"must": combined_must}}]
  565 + should_clauses.extend(optional_mm)
561 clause: Dict[str, Any] = { 566 clause: Dict[str, Any] = {
562 "bool": { 567 "bool": {
563 "_name": clause_name, 568 "_name": clause_name,
564 - "must": must_clauses, 569 + "should": should_clauses,
  570 + "minimum_should_match": 1,
565 } 571 }
566 } 572 }
567 - if should_clauses:  
568 - clause["bool"]["should"] = should_clauses  
569 if not is_source: 573 if not is_source:
570 clause["bool"]["boost"] = float(self.translation_boost) 574 clause["bool"]["boost"] = float(self.translation_boost)
571 return clause 575 return clause
tests/test_es_query_builder.py
@@ -112,7 +112,8 @@ def test_text_query_contains_only_base_and_translation_named_queries(): @@ -112,7 +112,8 @@ def test_text_query_contains_only_base_and_translation_named_queries():
112 112
113 assert names == ["base_query", "base_query_trans_zh"] 113 assert names == ["base_query", "base_query_trans_zh"]
114 base_should = should[0]["bool"]["should"] 114 base_should = should[0]["bool"]["should"]
115 - assert [clause["multi_match"]["type"] for clause in base_should] == ["best_fields", "phrase"] 115 + mm_types = [c["multi_match"]["type"] for c in base_should if "multi_match" in c]
  116 + assert mm_types == ["best_fields", "phrase"]
116 117
117 118
118 def test_text_query_skips_duplicate_translation_same_as_base(): 119 def test_text_query_skips_duplicate_translation_same_as_base():
@@ -134,7 +135,8 @@ def test_text_query_skips_duplicate_translation_same_as_base(): @@ -134,7 +135,8 @@ def test_text_query_skips_duplicate_translation_same_as_base():
134 query_root = query_root["function_score"]["query"] 135 query_root = query_root["function_score"]["query"]
135 base_bool = query_root["bool"] 136 base_bool = query_root["bool"]
136 assert base_bool["_name"] == "base_query" 137 assert base_bool["_name"] == "base_query"
137 - assert [clause["multi_match"]["type"] for clause in base_bool["should"]] == ["best_fields", "phrase"] 138 + mm_types = [c["multi_match"]["type"] for c in base_bool["should"] if "multi_match" in c]
  139 + assert mm_types == ["best_fields", "phrase"]
138 140
139 141
140 def test_product_title_exclusion_filter_is_applied_once_on_outer_query(): 142 def test_product_title_exclusion_filter_is_applied_once_on_outer_query():
tests/test_es_query_builder_text_recall_languages.py
@@ -63,8 +63,13 @@ def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: @@ -63,8 +63,13 @@ def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
63 return out 63 return out
64 64
65 65
  66 +def _combined_fields_must(clause: Dict[str, Any]) -> List[Dict[str, Any]]:
  67 + first = (clause.get("should") or [])[0]
  68 + return list(first["bool"]["must"])
  69 +
  70 +
66 def _combined_fields_clause(clause: Dict[str, Any]) -> Dict[str, Any]: 71 def _combined_fields_clause(clause: Dict[str, Any]) -> Dict[str, Any]:
67 - return clause["must"][0]["combined_fields"] 72 + return _combined_fields_must(clause)[0]["combined_fields"]
68 73
69 74
70 def _should_multi_matches(clause: Dict[str, Any]) -> List[Dict[str, Any]]: 75 def _should_multi_matches(clause: Dict[str, Any]) -> List[Dict[str, Any]]:
@@ -131,7 +136,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en(): @@ -131,7 +136,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en():
131 136
132 137
133 def test_keywords_combined_fields_second_must_same_fields_and_50pct(): 138 def test_keywords_combined_fields_second_must_same_fields_and_50pct():
134 - """When ParsedQuery.keywords_queries is set, must includes a second combined_fields.""" 139 + """When ParsedQuery.keywords_queries is set, inner must has two boosted combined_fields."""
135 qb = _builder_multilingual_title_only(default_language="en") 140 qb = _builder_multilingual_title_only(default_language="en")
136 parsed = SimpleNamespace( 141 parsed = SimpleNamespace(
137 rewritten_query="连衣裙", 142 rewritten_query="连衣裙",
@@ -142,15 +147,36 @@ def test_keywords_combined_fields_second_must_same_fields_and_50pct(): @@ -142,15 +147,36 @@ def test_keywords_combined_fields_second_must_same_fields_and_50pct():
142 q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False) 147 q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False)
143 idx = _clauses_index(q) 148 idx = _clauses_index(q)
144 base = idx["base_query"] 149 base = idx["base_query"]
145 - assert len(base["must"]) == 2  
146 - assert base["must"][0]["combined_fields"]["query"] == "连衣裙"  
147 - assert base["must"][1]["combined_fields"]["query"] == "连衣 裙"  
148 - assert base["must"][1]["combined_fields"]["minimum_should_match"] == "50%"  
149 - assert base["must"][1]["combined_fields"]["fields"] == base["must"][0]["combined_fields"]["fields"] 150 + assert base["minimum_should_match"] == 1
  151 + bm = _combined_fields_must(base)
  152 + assert len(bm) == 2
  153 + assert bm[0]["combined_fields"]["query"] == "连衣裙"
  154 + assert bm[0]["combined_fields"]["boost"] == 2.0
  155 + assert bm[1]["combined_fields"]["query"] == "连衣 裙"
  156 + assert bm[1]["combined_fields"]["minimum_should_match"] == "50%"
  157 + assert bm[1]["combined_fields"]["boost"] == 0.6
  158 + assert bm[1]["combined_fields"]["fields"] == bm[0]["combined_fields"]["fields"]
150 trans = idx["base_query_trans_en"] 159 trans = idx["base_query_trans_en"]
151 - assert len(trans["must"]) == 2  
152 - assert trans["must"][1]["combined_fields"]["query"] == "dress"  
153 - assert trans["must"][1]["combined_fields"]["minimum_should_match"] == "50%" 160 + assert trans["minimum_should_match"] == 1
  161 + tm = _combined_fields_must(trans)
  162 + assert len(tm) == 2
  163 + assert tm[1]["combined_fields"]["query"] == "dress"
  164 + assert tm[1]["combined_fields"]["minimum_should_match"] == "50%"
  165 + assert tm[1]["combined_fields"]["boost"] == 0.6
  166 +
  167 +
  168 +def test_keywords_omitted_when_same_as_main_combined_fields_query():
  169 + """No second combined_fields when keywords query equals the main lexical query."""
  170 + qb = _builder_multilingual_title_only(default_language="en")
  171 + parsed = SimpleNamespace(
  172 + rewritten_query="连衣裙",
  173 + detected_language="zh",
  174 + translations={},
  175 + keywords_queries={KEYWORDS_QUERY_BASE_KEY: "连衣裙"},
  176 + )
  177 + q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False)
  178 + idx = _clauses_index(q)
  179 + assert len(_combined_fields_must(idx["base_query"])) == 1
154 180
155 181
156 def test_en_query_index_zh_en_includes_base_en_and_trans_zh(): 182 def test_en_query_index_zh_en_includes_base_en_and_trans_zh():