Commit a3d3fb118ea8d4b24032d66f02021407daefa900

Authored by tangwang
1 parent 69881ecb

加phrase提权

@@ -57,6 +57,11 @@ image_embedding改为,一个spu有多个sku向量,每个向量内部properti @@ -57,6 +57,11 @@ image_embedding改为,一个spu有多个sku向量,每个向量内部properti
57 57
58 58
59 59
  60 +tags字段使用的优化:
  61 +现在是keyword,在搜索中,不太好使用(目前主要用于suggest)。
  62 +可以考虑也拆分多语言,配合analyzer使用(和qanchors一样)
  63 +
  64 +
60 65
61 外部需求: 66 外部需求:
62 1. 对推理能力要求很低、对耗时要求很高的大模型API(或者本地部署一个7b Q4量化的大模型),prompt大概30-50个token,首token响应要求500ms以内 67 1. 对推理能力要求很低、对耗时要求很高的大模型API(或者本地部署一个7b Q4量化的大模型),prompt大概30-50个token,首token响应要求500ms以内
search/es_query_builder.py
@@ -478,8 +478,7 @@ class ESQueryBuilder: @@ -478,8 +478,7 @@ class ESQueryBuilder:
478 should_clauses: List[Dict[str, Any]], 478 should_clauses: List[Dict[str, Any]],
479 lang: str, 479 lang: str,
480 lang_query: str, 480 lang_query: str,
481 - clause_name: str,  
482 - is_source: bool, 481 + clause_name: str
483 ) -> None: 482 ) -> None:
484 text = (lang_query or "").strip() 483 text = (lang_query or "").strip()
485 if not text: 484 if not text:
@@ -487,11 +486,7 @@ class ESQueryBuilder: @@ -487,11 +486,7 @@ class ESQueryBuilder:
487 phrase_fields = self._build_phrase_match_fields(lang) 486 phrase_fields = self._build_phrase_match_fields(lang)
488 if not phrase_fields: 487 if not phrase_fields:
489 return 488 return
490 - boost = (  
491 - self.phrase_match_boost  
492 - if is_source  
493 - else self.phrase_match_boost * float(self.translation_boost)  
494 - ) 489 + boost = self.phrase_match_boost
495 should_clauses.append({ 490 should_clauses.append({
496 "multi_match": { 491 "multi_match": {
497 "_name": f"{clause_name}_phrase", 492 "_name": f"{clause_name}_phrase",
@@ -642,7 +637,7 @@ class ESQueryBuilder: @@ -642,7 +637,7 @@ class ESQueryBuilder:
642 "multi_match": clause["multi_match"] 637 "multi_match": clause["multi_match"]
643 }) 638 })
644 self._append_phrase_should_clause( 639 self._append_phrase_should_clause(
645 - should_clauses, lang, lang_query, clause_name, is_source 640 + should_clauses, lang, lang_query, clause_name
646 ) 641 )
647 642
648 if base_query_text: 643 if base_query_text:
@@ -674,8 +669,7 @@ class ESQueryBuilder: @@ -674,8 +669,7 @@ class ESQueryBuilder:
674 fb_should, 669 fb_should,
675 self.default_language, 670 self.default_language,
676 query_text, 671 query_text,
677 - "base_query_fallback",  
678 - True, 672 + "base_query_fallback"
679 ) 673 )
680 if len(fb_should) == 1: 674 if len(fb_should) == 1:
681 return fallback_lexical 675 return fallback_lexical
tests/test_es_query_builder.py
1 from types import SimpleNamespace 1 from types import SimpleNamespace
  2 +from typing import Any, Dict
2 3
3 import numpy as np 4 import numpy as np
4 5
@@ -13,6 +14,21 @@ def _builder() -> ESQueryBuilder: @@ -13,6 +14,21 @@ def _builder() -> ESQueryBuilder:
13 ) 14 )
14 15
15 16
  17 +def _lexical_multi_match_fields(query_root: Dict[str, Any]) -> list:
  18 + """Fields from the non-phrase multi_match (bool.should or single clause)."""
  19 + if "multi_match" in query_root:
  20 + mm = query_root["multi_match"]
  21 + if mm.get("type") == "phrase":
  22 + raise AssertionError("root multi_match is phrase-only")
  23 + return mm["fields"]
  24 + for clause in query_root.get("bool", {}).get("should", []):
  25 + mm = clause.get("multi_match") or {}
  26 + if mm.get("type") == "phrase":
  27 + continue
  28 + return mm["fields"]
  29 + raise AssertionError("no lexical multi_match in query_root")
  30 +
  31 +
16 def test_knn_prefilter_includes_range_filters(): 32 def test_knn_prefilter_includes_range_filters():
17 qb = _builder() 33 qb = _builder()
18 q = qb.build_query( 34 q = qb.build_query(
@@ -82,7 +98,12 @@ def test_text_query_contains_only_base_and_translation_named_queries(): @@ -82,7 +98,12 @@ def test_text_query_contains_only_base_and_translation_named_queries():
82 should = q["query"]["bool"]["should"] 98 should = q["query"]["bool"]["should"]
83 names = [clause["multi_match"]["_name"] for clause in should] 99 names = [clause["multi_match"]["_name"] for clause in should]
84 100
85 - assert names == ["base_query", "base_query_trans_zh"] 101 + assert names == [
  102 + "base_query",
  103 + "base_query_phrase",
  104 + "base_query_trans_zh",
  105 + "base_query_trans_zh_phrase",
  106 + ]
86 107
87 108
88 def test_text_query_skips_duplicate_translation_same_as_base(): 109 def test_text_query_skips_duplicate_translation_same_as_base():
@@ -100,7 +121,9 @@ def test_text_query_skips_duplicate_translation_same_as_base(): @@ -100,7 +121,9 @@ def test_text_query_skips_duplicate_translation_same_as_base():
100 index_languages=["en", "zh"], 121 index_languages=["en", "zh"],
101 ) 122 )
102 123
103 - assert q["query"]["multi_match"]["_name"] == "base_query" 124 + root = q["query"]
  125 + assert root["bool"]["should"][0]["multi_match"]["_name"] == "base_query"
  126 + assert root["bool"]["should"][1]["multi_match"]["_name"] == "base_query_phrase"
104 127
105 128
106 def test_mixed_script_merges_en_fields_into_zh_clause(): 129 def test_mixed_script_merges_en_fields_into_zh_clause():
@@ -124,7 +147,7 @@ def test_mixed_script_merges_en_fields_into_zh_clause(): @@ -124,7 +147,7 @@ def test_mixed_script_merges_en_fields_into_zh_clause():
124 enable_knn=False, 147 enable_knn=False,
125 index_languages=["zh", "en"], 148 index_languages=["zh", "en"],
126 ) 149 )
127 - fields = q["query"]["multi_match"]["fields"] 150 + fields = _lexical_multi_match_fields(q["query"])
128 bases = {f.split("^", 1)[0] for f in fields} 151 bases = {f.split("^", 1)[0] for f in fields}
129 assert "title.zh" in bases and "title.en" in bases 152 assert "title.zh" in bases and "title.en" in bases
130 assert "brief.zh" in bases and "brief.en" in bases 153 assert "brief.zh" in bases and "brief.en" in bases
@@ -154,7 +177,7 @@ def test_mixed_script_merges_zh_fields_into_en_clause(): @@ -154,7 +177,7 @@ def test_mixed_script_merges_zh_fields_into_en_clause():
154 enable_knn=False, 177 enable_knn=False,
155 index_languages=["zh", "en"], 178 index_languages=["zh", "en"],
156 ) 179 )
157 - fields = q["query"]["multi_match"]["fields"] 180 + fields = _lexical_multi_match_fields(q["query"])
158 bases = {f.split("^", 1)[0] for f in fields} 181 bases = {f.split("^", 1)[0] for f in fields}
159 assert "title.en" in bases and "title.zh" in bases 182 assert "title.en" in bases and "title.zh" in bases
160 assert "title.zh^0.6" in fields 183 assert "title.zh^0.6" in fields
@@ -182,7 +205,7 @@ def test_mixed_script_merged_fields_scale_configured_boosts(): @@ -182,7 +205,7 @@ def test_mixed_script_merged_fields_scale_configured_boosts():
182 enable_knn=False, 205 enable_knn=False,
183 index_languages=["zh", "en"], 206 index_languages=["zh", "en"],
184 ) 207 )
185 - fields = q["query"]["multi_match"]["fields"] 208 + fields = _lexical_multi_match_fields(q["query"])
186 assert "title.zh^5.0" in fields 209 assert "title.zh^5.0" in fields
187 assert "title.en^6.0" in fields # 10.0 * 0.6 210 assert "title.en^6.0" in fields # 10.0 * 0.6
188 211
@@ -208,7 +231,7 @@ def test_mixed_script_does_not_merge_en_when_not_in_index_languages(): @@ -208,7 +231,7 @@ def test_mixed_script_does_not_merge_en_when_not_in_index_languages():
208 enable_knn=False, 231 enable_knn=False,
209 index_languages=["zh"], 232 index_languages=["zh"],
210 ) 233 )
211 - fields = q["query"]["multi_match"]["fields"] 234 + fields = _lexical_multi_match_fields(q["query"])
212 bases = {f.split("^", 1)[0] for f in fields} 235 bases = {f.split("^", 1)[0] for f in fields}
213 assert "title.zh" in bases 236 assert "title.zh" in bases
214 assert "title.en" not in bases 237 assert "title.en" not in bases
tests/test_es_query_builder_text_recall_languages.py
@@ -59,6 +59,11 @@ def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: @@ -59,6 +59,11 @@ def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
59 return out 59 return out
60 60
61 61
  62 +def _with_phrase(lexical_names: set[str]) -> set[str]:
  63 + """Each lexical recall clause has a companion ``*_phrase`` multi_match."""
  64 + return lexical_names | {f"{n}_phrase" for n in lexical_names}
  65 +
  66 +
62 def _title_fields(mm: Dict[str, Any]) -> List[str]: 67 def _title_fields(mm: Dict[str, Any]) -> List[str]:
63 fields = mm.get("fields") or [] 68 fields = mm.get("fields") or []
64 return [f for f in fields if str(f).startswith("title.")] 69 return [f for f in fields if str(f).startswith("title.")]
@@ -114,7 +119,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en(): @@ -114,7 +119,7 @@ def test_zh_query_index_zh_en_includes_base_zh_and_trans_en():
114 index_languages=["zh", "en"], 119 index_languages=["zh", "en"],
115 ) 120 )
116 idx = _clauses_index(q) 121 idx = _clauses_index(q)
117 - assert set(idx) == {"base_query", "base_query_trans_en"} 122 + assert set(idx) == _with_phrase({"base_query", "base_query_trans_en"})
118 assert idx["base_query"]["query"] == "连衣裙" 123 assert idx["base_query"]["query"] == "连衣裙"
119 assert "title.zh" in _title_fields(idx["base_query"]) 124 assert "title.zh" in _title_fields(idx["base_query"])
120 assert idx["base_query_trans_en"]["query"] == "dress" 125 assert idx["base_query_trans_en"]["query"] == "dress"
@@ -132,7 +137,7 @@ def test_en_query_index_zh_en_includes_base_en_and_trans_zh(): @@ -132,7 +137,7 @@ def test_en_query_index_zh_en_includes_base_en_and_trans_zh():
132 index_languages=["en", "zh"], 137 index_languages=["en", "zh"],
133 ) 138 )
134 idx = _clauses_index(q) 139 idx = _clauses_index(q)
135 - assert set(idx) == {"base_query", "base_query_trans_zh"} 140 + assert set(idx) == _with_phrase({"base_query", "base_query_trans_zh"})
136 assert idx["base_query"]["query"] == "dress" 141 assert idx["base_query"]["query"] == "dress"
137 assert "title.en" in _title_fields(idx["base_query"]) 142 assert "title.en" in _title_fields(idx["base_query"])
138 assert idx["base_query_trans_zh"]["query"] == "连衣裙" 143 assert idx["base_query_trans_zh"]["query"] == "连衣裙"
@@ -150,7 +155,9 @@ def test_de_query_index_de_en_fr_includes_base_and_two_translations(): @@ -150,7 +155,9 @@ def test_de_query_index_de_en_fr_includes_base_and_two_translations():
150 index_languages=["de", "en", "fr"], 155 index_languages=["de", "en", "fr"],
151 ) 156 )
152 idx = _clauses_index(q) 157 idx = _clauses_index(q)
153 - assert set(idx) == {"base_query", "base_query_trans_en", "base_query_trans_fr"} 158 + assert set(idx) == _with_phrase(
  159 + {"base_query", "base_query_trans_en", "base_query_trans_fr"}
  160 + )
154 assert idx["base_query"]["query"] == "kleid" 161 assert idx["base_query"]["query"] == "kleid"
155 assert "title.de" in _title_fields(idx["base_query"]) 162 assert "title.de" in _title_fields(idx["base_query"])
156 assert idx["base_query_trans_en"]["query"] == "dress" 163 assert idx["base_query_trans_en"]["query"] == "dress"
@@ -171,7 +178,9 @@ def test_de_query_index_only_en_zh_base_on_de_translations_on_target_fields(): @@ -171,7 +178,9 @@ def test_de_query_index_only_en_zh_base_on_de_translations_on_target_fields():
171 index_languages=["en", "zh"], 178 index_languages=["en", "zh"],
172 ) 179 )
173 idx = _clauses_index(q) 180 idx = _clauses_index(q)
174 - assert set(idx) == {"base_query", "base_query_trans_en", "base_query_trans_zh"} 181 + assert set(idx) == _with_phrase(
  182 + {"base_query", "base_query_trans_en", "base_query_trans_zh"}
  183 + )
175 assert idx["base_query"]["query"] == "schuh" 184 assert idx["base_query"]["query"] == "schuh"
176 assert "title.de" in _title_fields(idx["base_query"]) 185 assert "title.de" in _title_fields(idx["base_query"])
177 assert "boost" not in idx["base_query"] 186 assert "boost" not in idx["base_query"]
@@ -197,7 +206,7 @@ def test_mixed_zh_primary_with_en_translation_merges_en_into_zh_base_clause(): @@ -197,7 +206,7 @@ def test_mixed_zh_primary_with_en_translation_merges_en_into_zh_base_clause():
197 contains_english=True, 206 contains_english=True,
198 ) 207 )
199 idx = _clauses_index(q) 208 idx = _clauses_index(q)
200 - assert set(idx) == {"base_query", "base_query_trans_en"} 209 + assert set(idx) == _with_phrase({"base_query", "base_query_trans_en"})
201 assert idx["base_query"]["query"] == "红色 dress" 210 assert idx["base_query"]["query"] == "红色 dress"
202 assert _has_title_lang(idx["base_query"], "zh") and _has_title_lang(idx["base_query"], "en") 211 assert _has_title_lang(idx["base_query"], "zh") and _has_title_lang(idx["base_query"], "en")
203 assert idx["base_query_trans_en"]["query"] == "red dress" 212 assert idx["base_query_trans_en"]["query"] == "red dress"
@@ -217,7 +226,7 @@ def test_mixed_en_primary_with_zh_translation_merges_zh_into_en_base_clause(): @@ -217,7 +226,7 @@ def test_mixed_en_primary_with_zh_translation_merges_zh_into_en_base_clause():
217 contains_english=True, 226 contains_english=True,
218 ) 227 )
219 idx = _clauses_index(q) 228 idx = _clauses_index(q)
220 - assert set(idx) == {"base_query", "base_query_trans_zh"} 229 + assert set(idx) == _with_phrase({"base_query", "base_query_trans_zh"})
221 assert idx["base_query"]["query"] == "nike 运动鞋" 230 assert idx["base_query"]["query"] == "nike 运动鞋"
222 assert _has_title_lang(idx["base_query"], "en") and _has_title_lang(idx["base_query"], "zh") 231 assert _has_title_lang(idx["base_query"], "en") and _has_title_lang(idx["base_query"], "zh")
223 assert idx["base_query_trans_zh"]["query"] == "耐克运动鞋" 232 assert idx["base_query_trans_zh"]["query"] == "耐克运动鞋"
@@ -236,7 +245,7 @@ def test_mixed_zh_query_index_zh_only_no_en_merge_in_base(): @@ -236,7 +245,7 @@ def test_mixed_zh_query_index_zh_only_no_en_merge_in_base():
236 contains_english=True, 245 contains_english=True,
237 ) 246 )
238 idx = _clauses_index(q) 247 idx = _clauses_index(q)
239 - assert set(idx) == {"base_query"} 248 + assert set(idx) == _with_phrase({"base_query"})
240 bases = {f.split("^", 1)[0] for f in _title_fields(idx["base_query"])} 249 bases = {f.split("^", 1)[0] for f in _title_fields(idx["base_query"])}
241 assert bases == {"title.zh"} 250 assert bases == {"title.zh"}
242 251
@@ -255,7 +264,7 @@ def test_skips_translation_when_same_lang_and_same_text_as_base(): @@ -255,7 +264,7 @@ def test_skips_translation_when_same_lang_and_same_text_as_base():
255 index_languages=["en", "zh"], 264 index_languages=["en", "zh"],
256 ) 265 )
257 idx = _clauses_index(q) 266 idx = _clauses_index(q)
258 - assert set(idx) == {"base_query", "base_query_trans_zh"} 267 + assert set(idx) == _with_phrase({"base_query", "base_query_trans_zh"})
259 268
260 269
261 def test_keeps_translation_when_same_text_but_different_lang_than_base(): 270 def test_keeps_translation_when_same_text_but_different_lang_than_base():
@@ -269,7 +278,7 @@ def test_keeps_translation_when_same_text_but_different_lang_than_base(): @@ -269,7 +278,7 @@ def test_keeps_translation_when_same_text_but_different_lang_than_base():
269 index_languages=["en", "zh"], 278 index_languages=["en", "zh"],
270 ) 279 )
271 idx = _clauses_index(q) 280 idx = _clauses_index(q)
272 - assert set(idx) == {"base_query", "base_query_trans_zh"} 281 + assert set(idx) == _with_phrase({"base_query", "base_query_trans_zh"})
273 assert idx["base_query_trans_zh"]["query"] == "NIKE" 282 assert idx["base_query_trans_zh"]["query"] == "NIKE"
274 283
275 284
@@ -322,6 +331,8 @@ def test_empty_index_languages_treats_source_as_in_index_boosts(): @@ -322,6 +331,8 @@ def test_empty_index_languages_treats_source_as_in_index_boosts():
322 idx = _clauses_index(q) 331 idx = _clauses_index(q)
323 assert "boost" not in idx["base_query"] 332 assert "boost" not in idx["base_query"]
324 assert idx["base_query_trans_en"]["boost"] == qb.translation_boost 333 assert idx["base_query_trans_en"]["boost"] == qb.translation_boost
  334 + assert idx["base_query_phrase"]["boost"] == qb.phrase_match_boost
  335 + assert idx["base_query_trans_en_phrase"]["boost"] == qb.phrase_match_boost
325 336
326 337
327 # --- 无翻译:仅 base_query --- 338 # --- 无翻译:仅 base_query ---
@@ -338,7 +349,7 @@ def test_no_translations_only_base_query(): @@ -338,7 +349,7 @@ def test_no_translations_only_base_query():
338 index_languages=["en", "zh"], 349 index_languages=["en", "zh"],
339 ) 350 )
340 idx = _clauses_index(q) 351 idx = _clauses_index(q)
341 - assert set(idx) == {"base_query"} 352 + assert set(idx) == _with_phrase({"base_query"})
342 353
343 354
344 # --- 与 KNN 同存时仍能解析文本子句(顶层 knn 不影响 query 内结构) --- 355 # --- 与 KNN 同存时仍能解析文本子句(顶层 knn 不影响 query 内结构) ---
@@ -362,84 +373,7 @@ def test_text_clauses_present_alongside_knn(): @@ -362,84 +373,7 @@ def test_text_clauses_present_alongside_knn():
362 ) 373 )
363 assert "knn" in q 374 assert "knn" in q
364 idx = _clauses_index(q) 375 idx = _clauses_index(q)
365 - assert set(idx) == {"base_query", "base_query_trans_zh"}  
366 -  
367 -  
368 -def test_detected_language_unknown_falls_back_to_default_language():  
369 - """与 LanguageDetector 失败时 QueryConfig.default_language 行为对齐。"""  
370 - qb = _builder_multilingual_title_only(default_language="en")  
371 - parsed = SimpleNamespace(  
372 - rewritten_query="shirt",  
373 - detected_language="unknown",  
374 - translations={"zh": "衬衫"},  
375 - contains_chinese=False,  
376 - contains_english=True,  
377 - )  
378 - q = qb.build_query(  
379 - query_text="shirt",  
380 - parsed_query=parsed,  
381 - enable_knn=False,  
382 - index_languages=["en", "zh"],  
383 - )  
384 - idx = _clauses_index(q)  
385 - assert set(idx) == {"base_query", "base_query_trans_zh"}  
386 - assert idx["base_query"]["query"] == "shirt"  
387 - assert _has_title_lang(idx["base_query"], "en")  
388 -  
389 -  
390 -def test_ru_query_index_ru_en_includes_base_ru_and_trans_en():  
391 - qb = _builder_multilingual_title_only(default_language="en")  
392 - q = _build(  
393 - qb,  
394 - query_text="платье",  
395 - rewritten="платье",  
396 - detected_language="ru",  
397 - translations={"en": "dress"},  
398 - index_languages=["ru", "en"],  
399 - )  
400 - idx = _clauses_index(q)  
401 - assert set(idx) == {"base_query", "base_query_trans_en"}  
402 - assert idx["base_query"]["query"] == "платье"  
403 - assert _has_title_lang(idx["base_query"], "ru")  
404 - assert idx["base_query_trans_en"]["query"] == "dress"  
405 -  
406 -  
407 -def test_translation_for_lang_not_listed_in_index_languages_still_generates_clause():  
408 - """  
409 - 当前实现:凡是 translations 里非空的条目都会生成子句;  
410 - index_languages 只约束混写扩列,不用于过滤翻译子句。  
411 - """  
412 - qb = _builder_multilingual_title_only(default_language="en")  
413 - q = _build(  
414 - qb,  
415 - query_text="dress",  
416 - rewritten="dress",  
417 - detected_language="en",  
418 - translations={"zh": "连衣裙", "de": "Kleid"},  
419 - index_languages=["en", "zh"],  
420 - )  
421 - idx = _clauses_index(q)  
422 - assert "base_query_trans_de" in idx  
423 - assert idx["base_query_trans_de"]["query"] == "Kleid"  
424 - assert _has_title_lang(idx["base_query_trans_de"], "de")  
425 -  
426 -  
427 -def test_mixed_detected_zh_rewrite_differs_from_query_text_uses_rewritten_in_base():  
428 - """base_query 始终用 rewritten_query,而非仅 query_text。"""  
429 - qb = _builder_multilingual_title_only(default_language="en")  
430 - q = _build(  
431 - qb,  
432 - query_text=" 红色 ",  
433 - rewritten="红色连衣裙",  
434 - detected_language="zh",  
435 - translations={"en": "red dress"},  
436 - index_languages=["zh", "en"],  
437 - contains_chinese=True,  
438 - contains_english=False,  
439 - )  
440 - idx = _clauses_index(q)  
441 - assert idx["base_query"]["query"] == "红色连衣裙"  
442 - assert idx["base_query_trans_en"]["query"] == "red dress" 376 + assert set(idx) == _with_phrase({"base_query", "base_query_trans_zh"})
443 377
444 378
445 def test_detected_language_unknown_falls_back_to_default_language(): 379 def test_detected_language_unknown_falls_back_to_default_language():
@@ -459,7 +393,7 @@ def test_detected_language_unknown_falls_back_to_default_language(): @@ -459,7 +393,7 @@ def test_detected_language_unknown_falls_back_to_default_language():
459 index_languages=["en", "zh"], 393 index_languages=["en", "zh"],
460 ) 394 )
461 idx = _clauses_index(q) 395 idx = _clauses_index(q)
462 - assert set(idx) == {"base_query", "base_query_trans_zh"} 396 + assert set(idx) == _with_phrase({"base_query", "base_query_trans_zh"})
463 assert idx["base_query"]["query"] == "shirt" 397 assert idx["base_query"]["query"] == "shirt"
464 assert _has_title_lang(idx["base_query"], "en") 398 assert _has_title_lang(idx["base_query"], "en")
465 399
@@ -475,7 +409,7 @@ def test_ru_query_index_ru_en_includes_base_ru_and_trans_en(): @@ -475,7 +409,7 @@ def test_ru_query_index_ru_en_includes_base_ru_and_trans_en():
475 index_languages=["ru", "en"], 409 index_languages=["ru", "en"],
476 ) 410 )
477 idx = _clauses_index(q) 411 idx = _clauses_index(q)
478 - assert set(idx) == {"base_query", "base_query_trans_en"} 412 + assert set(idx) == _with_phrase({"base_query", "base_query_trans_en"})
479 assert idx["base_query"]["query"] == "платье" 413 assert idx["base_query"]["query"] == "платье"
480 assert _has_title_lang(idx["base_query"], "ru") 414 assert _has_title_lang(idx["base_query"], "ru")
481 assert idx["base_query_trans_en"]["query"] == "dress" 415 assert idx["base_query_trans_en"]["query"] == "dress"