Commit f8219b5e190aab5295698dca776574338a79d85f
1 parent
db9c469c
1.
must里面的两个combined_fields查询,boost分别设置为2和0.6,和其他查询条件一起,都放到should里面,设置minimum_should_match==1 2. 如果keywords_query跟combined_fields主查询的query一样,那么不需要再添加了
Showing
4 changed files
with
65 additions
and
26 deletions
Show diff stats
query/query_parser.py
| @@ -310,6 +310,7 @@ class QueryParser: | @@ -310,6 +310,7 @@ class QueryParser: | ||
| 310 | Returns: | 310 | Returns: |
| 311 | ParsedQuery object with all processing results | 311 | ParsedQuery object with all processing results |
| 312 | """ | 312 | """ |
| 313 | + parse_t0 = time.perf_counter() | ||
| 313 | # Initialize logger if context provided | 314 | # Initialize logger if context provided |
| 314 | active_logger = context.logger if context else logger | 315 | active_logger = context.logger if context else logger |
| 315 | if context and hasattr(context, "logger"): | 316 | if context and hasattr(context, "logger"): |
| @@ -615,17 +616,23 @@ class QueryParser: | @@ -615,17 +616,23 @@ class QueryParser: | ||
| 615 | product_title_exclusion_profile=product_title_exclusion_profile, | 616 | product_title_exclusion_profile=product_title_exclusion_profile, |
| 616 | ) | 617 | ) |
| 617 | 618 | ||
| 619 | + parse_total_ms = (time.perf_counter() - parse_t0) * 1000.0 | ||
| 620 | + completion_tail = ( | ||
| 621 | + f"Translation count: {len(translations)} | " | ||
| 622 | + f"Vector: {'yes' if query_vector is not None else 'no'} | " | ||
| 623 | + f"Image vector: {'yes' if image_query_vector is not None else 'no'} | " | ||
| 624 | + f"parse_total_ms={parse_total_ms:.1f}" | ||
| 625 | + ) | ||
| 618 | if context and hasattr(context, 'logger'): | 626 | if context and hasattr(context, 'logger'): |
| 619 | context.logger.info( | 627 | context.logger.info( |
| 620 | f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " | 628 | f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " |
| 621 | - f"Translation count: {len(translations)} | Vector: {'yes' if query_vector is not None else 'no'} | " | ||
| 622 | - f"Image vector: {'yes' if image_query_vector is not None else 'no'}", | 629 | + f"Language: {detected_lang} | {completion_tail}", |
| 623 | extra={'reqid': context.reqid, 'uid': context.uid} | 630 | extra={'reqid': context.reqid, 'uid': context.uid} |
| 624 | ) | 631 | ) |
| 625 | else: | 632 | else: |
| 626 | logger.info( | 633 | logger.info( |
| 627 | f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " | 634 | f"Query parsing completed | Original query: '{query}' | Final query: '{rewritten or query_text}' | " |
| 628 | - f"Language: {detected_lang}" | 635 | + f"Language: {detected_lang} | {completion_tail}" |
| 629 | ) | 636 | ) |
| 630 | 637 | ||
| 631 | return result | 638 | return result |
search/es_query_builder.py
| @@ -530,42 +530,46 @@ class ESQueryBuilder: | @@ -530,42 +530,46 @@ class ESQueryBuilder: | ||
| 530 | minimum_should_match = ( | 530 | minimum_should_match = ( |
| 531 | self.base_minimum_should_match if is_source else self.translation_minimum_should_match | 531 | self.base_minimum_should_match if is_source else self.translation_minimum_should_match |
| 532 | ) | 532 | ) |
| 533 | - must_clauses: List[Dict[str, Any]] = [ | 533 | + kw = (keywords_query or "").strip() |
| 534 | + main_query = (lang_query or "").strip() | ||
| 535 | + combined_must: List[Dict[str, Any]] = [ | ||
| 534 | { | 536 | { |
| 535 | "combined_fields": { | 537 | "combined_fields": { |
| 536 | - "query": lang_query, | 538 | + "query": main_query, |
| 537 | "fields": combined_fields, | 539 | "fields": combined_fields, |
| 538 | "minimum_should_match": minimum_should_match, | 540 | "minimum_should_match": minimum_should_match, |
| 541 | + "boost": 2.0, | ||
| 539 | } | 542 | } |
| 540 | } | 543 | } |
| 541 | ] | 544 | ] |
| 542 | - kw = (keywords_query or "").strip() | ||
| 543 | - if kw: | ||
| 544 | - must_clauses.append( | 545 | + if kw and kw != main_query: |
| 546 | + combined_must.append( | ||
| 545 | { | 547 | { |
| 546 | "combined_fields": { | 548 | "combined_fields": { |
| 547 | "query": kw, | 549 | "query": kw, |
| 548 | "fields": combined_fields, | 550 | "fields": combined_fields, |
| 549 | "minimum_should_match": self.keywords_minimum_should_match, | 551 | "minimum_should_match": self.keywords_minimum_should_match, |
| 552 | + "boost": 0.6, | ||
| 550 | } | 553 | } |
| 551 | } | 554 | } |
| 552 | ) | 555 | ) |
| 553 | - should_clauses = [ | 556 | + optional_mm = [ |
| 554 | clause | 557 | clause |
| 555 | for clause in ( | 558 | for clause in ( |
| 556 | - self._build_best_fields_clause(lang, lang_query), | ||
| 557 | - self._build_phrase_clause(lang, lang_query), | 559 | + self._build_best_fields_clause(lang, main_query), |
| 560 | + self._build_phrase_clause(lang, main_query), | ||
| 558 | ) | 561 | ) |
| 559 | if clause | 562 | if clause |
| 560 | ] | 563 | ] |
| 564 | + should_clauses: List[Dict[str, Any]] = [{"bool": {"must": combined_must}}] | ||
| 565 | + should_clauses.extend(optional_mm) | ||
| 561 | clause: Dict[str, Any] = { | 566 | clause: Dict[str, Any] = { |
| 562 | "bool": { | 567 | "bool": { |
| 563 | "_name": clause_name, | 568 | "_name": clause_name, |
| 564 | - "must": must_clauses, | 569 | + "should": should_clauses, |
| 570 | + "minimum_should_match": 1, | ||
| 565 | } | 571 | } |
| 566 | } | 572 | } |
| 567 | - if should_clauses: | ||
| 568 | - clause["bool"]["should"] = should_clauses | ||
| 569 | if not is_source: | 573 | if not is_source: |
| 570 | clause["bool"]["boost"] = float(self.translation_boost) | 574 | clause["bool"]["boost"] = float(self.translation_boost) |
| 571 | return clause | 575 | return clause |
tests/test_es_query_builder.py
| @@ -112,7 +112,8 @@ def test_text_query_contains_only_base_and_translation_named_queries(): | @@ -112,7 +112,8 @@ def test_text_query_contains_only_base_and_translation_named_queries(): | ||
| 112 | 112 | ||
| 113 | assert names == ["base_query", "base_query_trans_zh"] | 113 | assert names == ["base_query", "base_query_trans_zh"] |
| 114 | base_should = should[0]["bool"]["should"] | 114 | base_should = should[0]["bool"]["should"] |
| 115 | - assert [clause["multi_match"]["type"] for clause in base_should] == ["best_fields", "phrase"] | 115 | + mm_types = [c["multi_match"]["type"] for c in base_should if "multi_match" in c] |
| 116 | + assert mm_types == ["best_fields", "phrase"] | ||
| 116 | 117 | ||
| 117 | 118 | ||
| 118 | def test_text_query_skips_duplicate_translation_same_as_base(): | 119 | def test_text_query_skips_duplicate_translation_same_as_base(): |
| @@ -134,7 +135,8 @@ def test_text_query_skips_duplicate_translation_same_as_base(): | @@ -134,7 +135,8 @@ def test_text_query_skips_duplicate_translation_same_as_base(): | ||
| 134 | query_root = query_root["function_score"]["query"] | 135 | query_root = query_root["function_score"]["query"] |
| 135 | base_bool = query_root["bool"] | 136 | base_bool = query_root["bool"] |
| 136 | assert base_bool["_name"] == "base_query" | 137 | assert base_bool["_name"] == "base_query" |
| 137 | - assert [clause["multi_match"]["type"] for clause in base_bool["should"]] == ["best_fields", "phrase"] | 138 | + mm_types = [c["multi_match"]["type"] for c in base_bool["should"] if "multi_match" in c] |
| 139 | + assert mm_types == ["best_fields", "phrase"] | ||
| 138 | 140 | ||
| 139 | 141 | ||
| 140 | def test_product_title_exclusion_filter_is_applied_once_on_outer_query(): | 142 | def test_product_title_exclusion_filter_is_applied_once_on_outer_query(): |
tests/test_es_query_builder_text_recall_languages.py
| @@ -63,8 +63,13 @@ def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: | @@ -63,8 +63,13 @@ def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: | ||
| 63 | return out | 63 | return out |
| 64 | 64 | ||
| 65 | 65 | ||
| 66 | +def _combined_fields_must(clause: Dict[str, Any]) -> List[Dict[str, Any]]: | ||
| 67 | + first = (clause.get("should") or [])[0] | ||
| 68 | + return list(first["bool"]["must"]) | ||
| 69 | + | ||
| 70 | + | ||
| 66 | def _combined_fields_clause(clause: Dict[str, Any]) -> Dict[str, Any]: | 71 | def _combined_fields_clause(clause: Dict[str, Any]) -> Dict[str, Any]: |
| 67 | - return clause["must"][0]["combined_fields"] | 72 | + return _combined_fields_must(clause)[0]["combined_fields"] |
| 68 | 73 | ||
| 69 | 74 | ||
| 70 | def _should_multi_matches(clause: Dict[str, Any]) -> List[Dict[str, Any]]: | 75 | def _should_multi_matches(clause: Dict[str, Any]) -> List[Dict[str, Any]]: |
| @@ -131,7 +136,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en(): | @@ -131,7 +136,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en(): | ||
| 131 | 136 | ||
| 132 | 137 | ||
| 133 | def test_keywords_combined_fields_second_must_same_fields_and_50pct(): | 138 | def test_keywords_combined_fields_second_must_same_fields_and_50pct(): |
| 134 | - """When ParsedQuery.keywords_queries is set, must includes a second combined_fields.""" | 139 | + """When ParsedQuery.keywords_queries is set, inner must has two boosted combined_fields.""" |
| 135 | qb = _builder_multilingual_title_only(default_language="en") | 140 | qb = _builder_multilingual_title_only(default_language="en") |
| 136 | parsed = SimpleNamespace( | 141 | parsed = SimpleNamespace( |
| 137 | rewritten_query="连衣裙", | 142 | rewritten_query="连衣裙", |
| @@ -142,15 +147,36 @@ def test_keywords_combined_fields_second_must_same_fields_and_50pct(): | @@ -142,15 +147,36 @@ def test_keywords_combined_fields_second_must_same_fields_and_50pct(): | ||
| 142 | q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False) | 147 | q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False) |
| 143 | idx = _clauses_index(q) | 148 | idx = _clauses_index(q) |
| 144 | base = idx["base_query"] | 149 | base = idx["base_query"] |
| 145 | - assert len(base["must"]) == 2 | ||
| 146 | - assert base["must"][0]["combined_fields"]["query"] == "连衣裙" | ||
| 147 | - assert base["must"][1]["combined_fields"]["query"] == "连衣 裙" | ||
| 148 | - assert base["must"][1]["combined_fields"]["minimum_should_match"] == "50%" | ||
| 149 | - assert base["must"][1]["combined_fields"]["fields"] == base["must"][0]["combined_fields"]["fields"] | 150 | + assert base["minimum_should_match"] == 1 |
| 151 | + bm = _combined_fields_must(base) | ||
| 152 | + assert len(bm) == 2 | ||
| 153 | + assert bm[0]["combined_fields"]["query"] == "连衣裙" | ||
| 154 | + assert bm[0]["combined_fields"]["boost"] == 2.0 | ||
| 155 | + assert bm[1]["combined_fields"]["query"] == "连衣 裙" | ||
| 156 | + assert bm[1]["combined_fields"]["minimum_should_match"] == "50%" | ||
| 157 | + assert bm[1]["combined_fields"]["boost"] == 0.6 | ||
| 158 | + assert bm[1]["combined_fields"]["fields"] == bm[0]["combined_fields"]["fields"] | ||
| 150 | trans = idx["base_query_trans_en"] | 159 | trans = idx["base_query_trans_en"] |
| 151 | - assert len(trans["must"]) == 2 | ||
| 152 | - assert trans["must"][1]["combined_fields"]["query"] == "dress" | ||
| 153 | - assert trans["must"][1]["combined_fields"]["minimum_should_match"] == "50%" | 160 | + assert trans["minimum_should_match"] == 1 |
| 161 | + tm = _combined_fields_must(trans) | ||
| 162 | + assert len(tm) == 2 | ||
| 163 | + assert tm[1]["combined_fields"]["query"] == "dress" | ||
| 164 | + assert tm[1]["combined_fields"]["minimum_should_match"] == "50%" | ||
| 165 | + assert tm[1]["combined_fields"]["boost"] == 0.6 | ||
| 166 | + | ||
| 167 | + | ||
| 168 | +def test_keywords_omitted_when_same_as_main_combined_fields_query(): | ||
| 169 | + """No second combined_fields when keywords query equals the main lexical query.""" | ||
| 170 | + qb = _builder_multilingual_title_only(default_language="en") | ||
| 171 | + parsed = SimpleNamespace( | ||
| 172 | + rewritten_query="连衣裙", | ||
| 173 | + detected_language="zh", | ||
| 174 | + translations={}, | ||
| 175 | + keywords_queries={KEYWORDS_QUERY_BASE_KEY: "连衣裙"}, | ||
| 176 | + ) | ||
| 177 | + q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False) | ||
| 178 | + idx = _clauses_index(q) | ||
| 179 | + assert len(_combined_fields_must(idx["base_query"])) == 1 | ||
| 154 | 180 | ||
| 155 | 181 | ||
| 156 | def test_en_query_index_zh_en_includes_base_en_and_trans_zh(): | 182 | def test_en_query_index_zh_en_includes_base_en_and_trans_zh(): |