Commit f8219b5e190aab5295698dca776574338a79d85f
1 parent
db9c469c
1.
must里面的两个combined_fields查询,boost分别设置为2和0.6,和其他查询条件一起,都放到should里面,设置minimum_should_match==1 2. 如果keywords_query跟combined_fields主查询的query一样,那么不需要再添加了
Showing
4 changed files
with
65 additions
and
26 deletions
Show diff stats
query/query_parser.py
| ... | ... | @@ -310,6 +310,7 @@ class QueryParser: |
| 310 | 310 | Returns: |
| 311 | 311 | ParsedQuery object with all processing results |
| 312 | 312 | """ |
| 313 | + parse_t0 = time.perf_counter() | |
| 313 | 314 | # Initialize logger if context provided |
| 314 | 315 | active_logger = context.logger if context else logger |
| 315 | 316 | if context and hasattr(context, "logger"): |
| ... | ... | @@ -615,17 +616,23 @@ class QueryParser: |
| 615 | 616 | product_title_exclusion_profile=product_title_exclusion_profile, |
| 616 | 617 | ) |
| 617 | 618 | |
| 619 | + parse_total_ms = (time.perf_counter() - parse_t0) * 1000.0 | |
| 620 | + completion_tail = ( | |
| 621 | + f"Translation count: {len(translations)} | " | |
| 622 | + f"Vector: {'yes' if query_vector is not None else 'no'} | " | |
| 623 | + f"Image vector: {'yes' if image_query_vector is not None else 'no'} | " | |
| 624 | + f"parse_total_ms={parse_total_ms:.1f}" | |
| 625 | + ) | |
| 618 | 626 | if context and hasattr(context, 'logger'): |
| 619 | 627 | context.logger.info( |
| 620 | 628 | f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " |
| 621 | - f"Translation count: {len(translations)} | Vector: {'yes' if query_vector is not None else 'no'} | " | |
| 622 | - f"Image vector: {'yes' if image_query_vector is not None else 'no'}", | |
| 629 | + f"Language: {detected_lang} | {completion_tail}", | |
| 623 | 630 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 624 | 631 | ) |
| 625 | 632 | else: |
| 626 | 633 | logger.info( |
| 627 | 634 | f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " |
| 628 | - f"Language: {detected_lang}" | |
| 635 | + f"Language: {detected_lang} | {completion_tail}" | |
| 629 | 636 | ) |
| 630 | 637 | |
| 631 | 638 | return result | ... | ... |
search/es_query_builder.py
| ... | ... | @@ -530,42 +530,46 @@ class ESQueryBuilder: |
| 530 | 530 | minimum_should_match = ( |
| 531 | 531 | self.base_minimum_should_match if is_source else self.translation_minimum_should_match |
| 532 | 532 | ) |
| 533 | - must_clauses: List[Dict[str, Any]] = [ | |
| 533 | + kw = (keywords_query or "").strip() | |
| 534 | + main_query = (lang_query or "").strip() | |
| 535 | + combined_must: List[Dict[str, Any]] = [ | |
| 534 | 536 | { |
| 535 | 537 | "combined_fields": { |
| 536 | - "query": lang_query, | |
| 538 | + "query": main_query, | |
| 537 | 539 | "fields": combined_fields, |
| 538 | 540 | "minimum_should_match": minimum_should_match, |
| 541 | + "boost": 2.0, | |
| 539 | 542 | } |
| 540 | 543 | } |
| 541 | 544 | ] |
| 542 | - kw = (keywords_query or "").strip() | |
| 543 | - if kw: | |
| 544 | - must_clauses.append( | |
| 545 | + if kw and kw != main_query: | |
| 546 | + combined_must.append( | |
| 545 | 547 | { |
| 546 | 548 | "combined_fields": { |
| 547 | 549 | "query": kw, |
| 548 | 550 | "fields": combined_fields, |
| 549 | 551 | "minimum_should_match": self.keywords_minimum_should_match, |
| 552 | + "boost": 0.6, | |
| 550 | 553 | } |
| 551 | 554 | } |
| 552 | 555 | ) |
| 553 | - should_clauses = [ | |
| 556 | + optional_mm = [ | |
| 554 | 557 | clause |
| 555 | 558 | for clause in ( |
| 556 | - self._build_best_fields_clause(lang, lang_query), | |
| 557 | - self._build_phrase_clause(lang, lang_query), | |
| 559 | + self._build_best_fields_clause(lang, main_query), | |
| 560 | + self._build_phrase_clause(lang, main_query), | |
| 558 | 561 | ) |
| 559 | 562 | if clause |
| 560 | 563 | ] |
| 564 | + should_clauses: List[Dict[str, Any]] = [{"bool": {"must": combined_must}}] | |
| 565 | + should_clauses.extend(optional_mm) | |
| 561 | 566 | clause: Dict[str, Any] = { |
| 562 | 567 | "bool": { |
| 563 | 568 | "_name": clause_name, |
| 564 | - "must": must_clauses, | |
| 569 | + "should": should_clauses, | |
| 570 | + "minimum_should_match": 1, | |
| 565 | 571 | } |
| 566 | 572 | } |
| 567 | - if should_clauses: | |
| 568 | - clause["bool"]["should"] = should_clauses | |
| 569 | 573 | if not is_source: |
| 570 | 574 | clause["bool"]["boost"] = float(self.translation_boost) |
| 571 | 575 | return clause | ... | ... |
tests/test_es_query_builder.py
| ... | ... | @@ -112,7 +112,8 @@ def test_text_query_contains_only_base_and_translation_named_queries(): |
| 112 | 112 | |
| 113 | 113 | assert names == ["base_query", "base_query_trans_zh"] |
| 114 | 114 | base_should = should[0]["bool"]["should"] |
| 115 | - assert [clause["multi_match"]["type"] for clause in base_should] == ["best_fields", "phrase"] | |
| 115 | + mm_types = [c["multi_match"]["type"] for c in base_should if "multi_match" in c] | |
| 116 | + assert mm_types == ["best_fields", "phrase"] | |
| 116 | 117 | |
| 117 | 118 | |
| 118 | 119 | def test_text_query_skips_duplicate_translation_same_as_base(): |
| ... | ... | @@ -134,7 +135,8 @@ def test_text_query_skips_duplicate_translation_same_as_base(): |
| 134 | 135 | query_root = query_root["function_score"]["query"] |
| 135 | 136 | base_bool = query_root["bool"] |
| 136 | 137 | assert base_bool["_name"] == "base_query" |
| 137 | - assert [clause["multi_match"]["type"] for clause in base_bool["should"]] == ["best_fields", "phrase"] | |
| 138 | + mm_types = [c["multi_match"]["type"] for c in base_bool["should"] if "multi_match" in c] | |
| 139 | + assert mm_types == ["best_fields", "phrase"] | |
| 138 | 140 | |
| 139 | 141 | |
| 140 | 142 | def test_product_title_exclusion_filter_is_applied_once_on_outer_query(): | ... | ... |
tests/test_es_query_builder_text_recall_languages.py
| ... | ... | @@ -63,8 +63,13 @@ def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: |
| 63 | 63 | return out |
| 64 | 64 | |
| 65 | 65 | |
| 66 | +def _combined_fields_must(clause: Dict[str, Any]) -> List[Dict[str, Any]]: | |
| 67 | + first = (clause.get("should") or [])[0] | |
| 68 | + return list(first["bool"]["must"]) | |
| 69 | + | |
| 70 | + | |
| 66 | 71 | def _combined_fields_clause(clause: Dict[str, Any]) -> Dict[str, Any]: |
| 67 | - return clause["must"][0]["combined_fields"] | |
| 72 | + return _combined_fields_must(clause)[0]["combined_fields"] | |
| 68 | 73 | |
| 69 | 74 | |
| 70 | 75 | def _should_multi_matches(clause: Dict[str, Any]) -> List[Dict[str, Any]]: |
| ... | ... | @@ -131,7 +136,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en(): |
| 131 | 136 | |
| 132 | 137 | |
| 133 | 138 | def test_keywords_combined_fields_second_must_same_fields_and_50pct(): |
| 134 | - """When ParsedQuery.keywords_queries is set, must includes a second combined_fields.""" | |
| 139 | + """When ParsedQuery.keywords_queries is set, inner must has two boosted combined_fields.""" | |
| 135 | 140 | qb = _builder_multilingual_title_only(default_language="en") |
| 136 | 141 | parsed = SimpleNamespace( |
| 137 | 142 | rewritten_query="连衣裙", |
| ... | ... | @@ -142,15 +147,36 @@ def test_keywords_combined_fields_second_must_same_fields_and_50pct(): |
| 142 | 147 | q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False) |
| 143 | 148 | idx = _clauses_index(q) |
| 144 | 149 | base = idx["base_query"] |
| 145 | - assert len(base["must"]) == 2 | |
| 146 | - assert base["must"][0]["combined_fields"]["query"] == "连衣裙" | |
| 147 | - assert base["must"][1]["combined_fields"]["query"] == "连衣 裙" | |
| 148 | - assert base["must"][1]["combined_fields"]["minimum_should_match"] == "50%" | |
| 149 | - assert base["must"][1]["combined_fields"]["fields"] == base["must"][0]["combined_fields"]["fields"] | |
| 150 | + assert base["minimum_should_match"] == 1 | |
| 151 | + bm = _combined_fields_must(base) | |
| 152 | + assert len(bm) == 2 | |
| 153 | + assert bm[0]["combined_fields"]["query"] == "连衣裙" | |
| 154 | + assert bm[0]["combined_fields"]["boost"] == 2.0 | |
| 155 | + assert bm[1]["combined_fields"]["query"] == "连衣 裙" | |
| 156 | + assert bm[1]["combined_fields"]["minimum_should_match"] == "50%" | |
| 157 | + assert bm[1]["combined_fields"]["boost"] == 0.6 | |
| 158 | + assert bm[1]["combined_fields"]["fields"] == bm[0]["combined_fields"]["fields"] | |
| 150 | 159 | trans = idx["base_query_trans_en"] |
| 151 | - assert len(trans["must"]) == 2 | |
| 152 | - assert trans["must"][1]["combined_fields"]["query"] == "dress" | |
| 153 | - assert trans["must"][1]["combined_fields"]["minimum_should_match"] == "50%" | |
| 160 | + assert trans["minimum_should_match"] == 1 | |
| 161 | + tm = _combined_fields_must(trans) | |
| 162 | + assert len(tm) == 2 | |
| 163 | + assert tm[1]["combined_fields"]["query"] == "dress" | |
| 164 | + assert tm[1]["combined_fields"]["minimum_should_match"] == "50%" | |
| 165 | + assert tm[1]["combined_fields"]["boost"] == 0.6 | |
| 166 | + | |
| 167 | + | |
| 168 | +def test_keywords_omitted_when_same_as_main_combined_fields_query(): | |
| 169 | + """No second combined_fields when keywords query equals the main lexical query.""" | |
| 170 | + qb = _builder_multilingual_title_only(default_language="en") | |
| 171 | + parsed = SimpleNamespace( | |
| 172 | + rewritten_query="连衣裙", | |
| 173 | + detected_language="zh", | |
| 174 | + translations={}, | |
| 175 | + keywords_queries={KEYWORDS_QUERY_BASE_KEY: "连衣裙"}, | |
| 176 | + ) | |
| 177 | + q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False) | |
| 178 | + idx = _clauses_index(q) | |
| 179 | + assert len(_combined_fields_must(idx["base_query"])) == 1 | |
| 154 | 180 | |
| 155 | 181 | |
| 156 | 182 | def test_en_query_index_zh_en_includes_base_en_and_trans_zh(): | ... | ... |