0536222c
tangwang
query parser优化
|
1
2
3
|
"""
ES text recall: base_query (rewritten @ detected_language) + base_query_trans_*.
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
4
5
6
|
Covers translation routing, mixed-script queries (per-clause language fields only),
and clause naming. Asserts named lexical clause boundaries, combined_fields payloads,
and per-language target fields (title.{lang}).
|
0536222c
tangwang
query parser优化
|
7
8
9
10
11
12
13
|
"""
from types import SimpleNamespace
from typing import Any, Dict, List
import numpy as np
|
ceaf6d03
tangwang
召回限定:must条件补充主干词命...
|
14
|
from query.keyword_extractor import KEYWORDS_QUERY_BASE_KEY
|
0536222c
tangwang
query parser优化
|
15
16
17
|
from search.es_query_builder import ESQueryBuilder
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
18
|
def _builder_multilingual_title_only(*, default_language: str = "en") -> ESQueryBuilder:
|
0536222c
tangwang
query parser优化
|
19
20
21
22
23
24
25
|
"""Minimal builder: only title.{lang} for easy field assertions."""
return ESQueryBuilder(
match_fields=["title.en^1.0"],
multilingual_fields=["title"],
shared_fields=[],
text_embedding_field="title_embedding",
default_language=default_language,
|
0536222c
tangwang
query parser优化
|
26
27
28
29
30
31
32
|
function_score_config=None,
)
def _unwrap_inner_query(es_body: Dict[str, Any]) -> Dict[str, Any]:
"""Navigate bool.must / function_score wrappers to the text recall root."""
q = es_body.get("query") or {}
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
33
|
if "bool" in q and not q["bool"].get("_name") and "must" in q["bool"] and q["bool"]["must"]:
|
0536222c
tangwang
query parser优化
|
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
q = q["bool"]["must"][0]
if "function_score" in q:
q = q["function_score"]["query"]
return q
def _extract_multi_match_clauses(es_body: Dict[str, Any]) -> List[Dict[str, Any]]:
inner = _unwrap_inner_query(es_body)
if "multi_match" in inner:
return [inner["multi_match"]]
should = (inner.get("bool") or {}).get("should") or []
return [c["multi_match"] for c in should if "multi_match" in c]
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
48
49
50
51
52
53
54
55
|
def _extract_named_lexical_clauses(es_body: Dict[str, Any]) -> List[Dict[str, Any]]:
inner = _unwrap_inner_query(es_body)
if "bool" in inner and inner["bool"].get("_name"):
return [inner["bool"]]
should = (inner.get("bool") or {}).get("should") or []
return [c["bool"] for c in should if "bool" in c and c["bool"].get("_name")]
|
0536222c
tangwang
query parser优化
|
56
|
def _clauses_index(es_body: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
57
|
"""Map lexical clause _name -> bool query body."""
|
0536222c
tangwang
query parser优化
|
58
|
out: Dict[str, Dict[str, Any]] = {}
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
59
60
|
for clause in _extract_named_lexical_clauses(es_body):
name = clause.get("_name")
|
0536222c
tangwang
query parser优化
|
61
|
if name:
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
62
|
out[str(name)] = clause
|
0536222c
tangwang
query parser优化
|
63
64
65
|
return out
|
f8219b5e
tangwang
1.
|
66
67
68
69
70
|
def _combined_fields_must(clause: Dict[str, Any]) -> List[Dict[str, Any]]:
first = (clause.get("should") or [])[0]
return list(first["bool"]["must"])
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
71
|
def _combined_fields_clause(clause: Dict[str, Any]) -> Dict[str, Any]:
|
f8219b5e
tangwang
1.
|
72
|
return _combined_fields_must(clause)[0]["combined_fields"]
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
73
|
|
a3d3fb11
tangwang
加phrase提权
|
74
|
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
75
76
|
def _should_multi_matches(clause: Dict[str, Any]) -> List[Dict[str, Any]]:
return [item["multi_match"] for item in clause.get("should") or [] if "multi_match" in item]
|
a3d3fb11
tangwang
加phrase提权
|
77
|
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
78
79
80
81
82
83
84
|
def _should_multi_matches_by_type(clause: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
return {str(mm.get("type") or "best_fields"): mm for mm in _should_multi_matches(clause)}
def _title_fields(clause: Dict[str, Any]) -> List[str]:
fields = _combined_fields_clause(clause).get("fields") or []
|
0536222c
tangwang
query parser优化
|
85
86
87
|
return [f for f in fields if str(f).startswith("title.")]
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
88
|
def _has_title_lang(clause: Dict[str, Any], lang: str) -> bool:
|
0536222c
tangwang
query parser优化
|
89
90
|
"""True if any field is title.{lang} with optional ^boost suffix."""
prefix = f"title.{lang}"
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
91
|
for f in _combined_fields_clause(clause).get("fields") or []:
|
0536222c
tangwang
query parser优化
|
92
93
94
95
96
97
98
99
100
101
102
103
104
|
s = str(f)
if s == prefix or s.startswith(prefix + "^"):
return True
return False
def _build(
qb: ESQueryBuilder,
*,
query_text: str,
rewritten: str,
detected_language: str,
translations: Dict[str, str],
|
0536222c
tangwang
query parser优化
|
105
106
107
108
109
|
) -> Dict[str, Any]:
parsed = SimpleNamespace(
rewritten_query=rewritten,
detected_language=detected_language,
translations=dict(translations),
|
0536222c
tangwang
query parser优化
|
110
111
112
113
114
|
)
return qb.build_query(
query_text=query_text,
parsed_query=parsed,
enable_knn=False,
|
0536222c
tangwang
query parser优化
|
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
)
# --- 检测语言在 index_languages 内:主召回 + 翻译补召回 ---
def test_zh_query_index_zh_en_includes_base_zh_and_trans_en():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="连衣裙",
rewritten="连衣裙",
detected_language="zh",
translations={"en": "dress"},
|
0536222c
tangwang
query parser优化
|
129
130
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
131
132
|
assert set(idx) == {"base_query", "base_query_trans_en"}
assert _combined_fields_clause(idx["base_query"])["query"] == "连衣裙"
|
0536222c
tangwang
query parser优化
|
133
|
assert "title.zh" in _title_fields(idx["base_query"])
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
134
|
assert _combined_fields_clause(idx["base_query_trans_en"])["query"] == "dress"
|
0536222c
tangwang
query parser优化
|
135
136
137
|
assert "title.en" in _title_fields(idx["base_query_trans_en"])
|
ceaf6d03
tangwang
召回限定:must条件补充主干词命...
|
138
|
def test_keywords_combined_fields_second_must_same_fields_and_50pct():
|
f8219b5e
tangwang
1.
|
139
|
"""When ParsedQuery.keywords_queries is set, inner must has two boosted combined_fields."""
|
ceaf6d03
tangwang
召回限定:must条件补充主干词命...
|
140
141
142
143
144
145
146
147
148
149
|
qb = _builder_multilingual_title_only(default_language="en")
parsed = SimpleNamespace(
rewritten_query="连衣裙",
detected_language="zh",
translations={"en": "red dress"},
keywords_queries={KEYWORDS_QUERY_BASE_KEY: "连衣 裙", "en": "dress"},
)
q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False)
idx = _clauses_index(q)
base = idx["base_query"]
|
f8219b5e
tangwang
1.
|
150
151
152
153
154
155
156
157
158
|
assert base["minimum_should_match"] == 1
bm = _combined_fields_must(base)
assert len(bm) == 2
assert bm[0]["combined_fields"]["query"] == "连衣裙"
assert bm[0]["combined_fields"]["boost"] == 2.0
assert bm[1]["combined_fields"]["query"] == "连衣 裙"
assert bm[1]["combined_fields"]["minimum_should_match"] == "50%"
assert bm[1]["combined_fields"]["boost"] == 0.6
assert bm[1]["combined_fields"]["fields"] == bm[0]["combined_fields"]["fields"]
|
ceaf6d03
tangwang
召回限定:must条件补充主干词命...
|
159
|
trans = idx["base_query_trans_en"]
|
f8219b5e
tangwang
1.
|
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
|
assert trans["minimum_should_match"] == 1
tm = _combined_fields_must(trans)
assert len(tm) == 2
assert tm[1]["combined_fields"]["query"] == "dress"
assert tm[1]["combined_fields"]["minimum_should_match"] == "50%"
assert tm[1]["combined_fields"]["boost"] == 0.6
def test_keywords_omitted_when_same_as_main_combined_fields_query():
"""No second combined_fields when keywords query equals the main lexical query."""
qb = _builder_multilingual_title_only(default_language="en")
parsed = SimpleNamespace(
rewritten_query="连衣裙",
detected_language="zh",
translations={},
keywords_queries={KEYWORDS_QUERY_BASE_KEY: "连衣裙"},
)
q = qb.build_query(query_text="连衣裙", parsed_query=parsed, enable_knn=False)
idx = _clauses_index(q)
assert len(_combined_fields_must(idx["base_query"])) == 1
|
ceaf6d03
tangwang
召回限定:must条件补充主干词命...
|
180
181
|
|
0536222c
tangwang
query parser优化
|
182
183
184
185
186
187
188
189
|
def test_en_query_index_zh_en_includes_base_en_and_trans_zh():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="dress",
rewritten="dress",
detected_language="en",
translations={"zh": "连衣裙"},
|
0536222c
tangwang
query parser优化
|
190
191
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
192
193
|
assert set(idx) == {"base_query", "base_query_trans_zh"}
assert _combined_fields_clause(idx["base_query"])["query"] == "dress"
|
0536222c
tangwang
query parser优化
|
194
|
assert "title.en" in _title_fields(idx["base_query"])
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
195
|
assert _combined_fields_clause(idx["base_query_trans_zh"])["query"] == "连衣裙"
|
0536222c
tangwang
query parser优化
|
196
197
198
199
200
201
202
203
204
205
206
|
assert "title.zh" in _title_fields(idx["base_query_trans_zh"])
def test_de_query_index_de_en_fr_includes_base_and_two_translations():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="kleid",
rewritten="kleid",
detected_language="de",
translations={"en": "dress", "fr": "robe"},
|
0536222c
tangwang
query parser优化
|
207
208
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
209
210
|
assert set(idx) == {"base_query", "base_query_trans_en", "base_query_trans_fr"}
assert _combined_fields_clause(idx["base_query"])["query"] == "kleid"
|
0536222c
tangwang
query parser优化
|
211
|
assert "title.de" in _title_fields(idx["base_query"])
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
212
213
|
assert _combined_fields_clause(idx["base_query_trans_en"])["query"] == "dress"
assert _combined_fields_clause(idx["base_query_trans_fr"])["query"] == "robe"
|
0536222c
tangwang
query parser优化
|
214
215
216
217
218
219
220
221
222
223
224
225
226
|
# --- 检测语言不在 index_languages:仍有 base(弱)+ 翻译(强) ---
def test_de_query_index_only_en_zh_base_on_de_translations_on_target_fields():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="schuh",
rewritten="schuh",
detected_language="de",
translations={"en": "shoe", "zh": "鞋"},
|
0536222c
tangwang
query parser优化
|
227
228
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
229
230
|
assert set(idx) == {"base_query", "base_query_trans_en", "base_query_trans_zh"}
assert _combined_fields_clause(idx["base_query"])["query"] == "schuh"
|
0536222c
tangwang
query parser优化
|
231
232
|
assert "title.de" in _title_fields(idx["base_query"])
assert "boost" not in idx["base_query"]
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
233
|
assert _combined_fields_clause(idx["base_query_trans_en"])["query"] == "shoe"
|
0536222c
tangwang
query parser优化
|
234
|
assert idx["base_query_trans_en"]["boost"] == qb.translation_boost
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
235
|
assert _combined_fields_clause(idx["base_query_trans_zh"])["query"] == "鞋"
|
0536222c
tangwang
query parser优化
|
236
237
238
|
assert idx["base_query_trans_zh"]["boost"] == qb.translation_boost
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
239
|
# --- 中英混写:base 打在检测语种字段;翻译子句打在译文语种字段 ---
|
0536222c
tangwang
query parser优化
|
240
241
|
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
242
|
def test_mixed_zh_detected_base_clause_zh_fields_only_with_en_translation():
|
0536222c
tangwang
query parser优化
|
243
244
245
246
247
248
249
|
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="红色 dress",
rewritten="红色 dress",
detected_language="zh",
translations={"en": "red dress"},
|
0536222c
tangwang
query parser优化
|
250
251
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
252
253
|
assert set(idx) == {"base_query", "base_query_trans_en"}
assert _combined_fields_clause(idx["base_query"])["query"] == "红色 dress"
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
254
|
assert _has_title_lang(idx["base_query"], "zh") and not _has_title_lang(idx["base_query"], "en")
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
255
|
assert _combined_fields_clause(idx["base_query_trans_en"])["query"] == "red dress"
|
0536222c
tangwang
query parser优化
|
256
257
258
|
assert _has_title_lang(idx["base_query_trans_en"], "en")
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
259
|
def test_mixed_en_detected_base_clause_en_fields_only_with_zh_translation():
|
0536222c
tangwang
query parser优化
|
260
261
262
263
264
265
266
|
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="nike 运动鞋",
rewritten="nike 运动鞋",
detected_language="en",
translations={"zh": "耐克运动鞋"},
|
0536222c
tangwang
query parser优化
|
267
268
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
269
270
|
assert set(idx) == {"base_query", "base_query_trans_zh"}
assert _combined_fields_clause(idx["base_query"])["query"] == "nike 运动鞋"
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
271
|
assert _has_title_lang(idx["base_query"], "en") and not _has_title_lang(idx["base_query"], "zh")
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
272
|
assert _combined_fields_clause(idx["base_query_trans_zh"])["query"] == "耐克运动鞋"
|
0536222c
tangwang
query parser优化
|
273
274
|
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
275
|
def test_zh_query_no_translations_only_zh_fields():
|
0536222c
tangwang
query parser优化
|
276
277
278
279
280
281
282
|
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="法式 dress",
rewritten="法式 dress",
detected_language="zh",
translations={},
|
0536222c
tangwang
query parser优化
|
283
284
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
285
|
assert set(idx) == {"base_query"}
|
0536222c
tangwang
query parser优化
|
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
|
bases = {f.split("^", 1)[0] for f in _title_fields(idx["base_query"])}
assert bases == {"title.zh"}
# --- 去重:与 base 同语言同文本的翻译项跳过 ---
def test_skips_translation_when_same_lang_and_same_text_as_base():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="NIKE",
rewritten="NIKE",
detected_language="en",
translations={"en": "NIKE", "zh": "耐克"},
|
0536222c
tangwang
query parser优化
|
301
302
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
303
|
assert set(idx) == {"base_query", "base_query_trans_zh"}
|
0536222c
tangwang
query parser优化
|
304
305
306
307
308
309
310
311
312
313
|
def test_keeps_translation_when_same_text_but_different_lang_than_base():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="NIKE",
rewritten="NIKE",
detected_language="en",
translations={"zh": "NIKE"},
|
0536222c
tangwang
query parser优化
|
314
315
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
316
317
|
assert set(idx) == {"base_query", "base_query_trans_zh"}
assert _combined_fields_clause(idx["base_query_trans_zh"])["query"] == "NIKE"
|
0536222c
tangwang
query parser优化
|
318
319
320
321
322
323
324
325
326
327
328
329
330
|
# --- 翻译 key 规范化、空翻译跳过 ---
def test_translation_language_key_is_normalized_case_insensitive():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="dress",
rewritten="dress",
detected_language="en",
translations={"ZH": "连衣裙"},
|
0536222c
tangwang
query parser优化
|
331
332
333
|
)
idx = _clauses_index(q)
assert "base_query_trans_zh" in idx
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
334
|
assert _combined_fields_clause(idx["base_query_trans_zh"])["query"] == "连衣裙"
|
0536222c
tangwang
query parser优化
|
335
336
337
338
339
340
341
342
343
344
|
def test_empty_translation_value_is_skipped():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="dress",
rewritten="dress",
detected_language="en",
translations={"zh": " ", "fr": "robe"},
|
0536222c
tangwang
query parser优化
|
345
346
347
348
349
350
|
)
idx = _clauses_index(q)
assert "base_query_trans_zh" not in idx
assert "base_query_trans_fr" in idx
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
351
|
# --- base 子句无 bool.boost;翻译子句带 translation_boost;phrase should 继承 phrase_match_boost ---
|
0536222c
tangwang
query parser优化
|
352
353
|
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
354
|
def test_de_base_and_en_translation_phrase_boosts():
|
0536222c
tangwang
query parser优化
|
355
356
357
358
359
360
361
|
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="x",
rewritten="x",
detected_language="de",
translations={"en": "y"},
|
0536222c
tangwang
query parser优化
|
362
363
364
365
|
)
idx = _clauses_index(q)
assert "boost" not in idx["base_query"]
assert idx["base_query_trans_en"]["boost"] == qb.translation_boost
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
366
367
368
369
|
base_should = _should_multi_matches_by_type(idx["base_query"])
trans_should = _should_multi_matches_by_type(idx["base_query_trans_en"])
assert base_should["phrase"]["boost"] == qb.phrase_match_boost
assert trans_should["phrase"]["boost"] == qb.phrase_match_boost
|
0536222c
tangwang
query parser优化
|
370
371
372
373
374
375
376
377
378
379
380
381
382
|
# --- 无翻译:仅 base_query ---
def test_no_translations_only_base_query():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="hello",
rewritten="hello",
detected_language="en",
translations={},
|
0536222c
tangwang
query parser优化
|
383
384
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
385
|
assert set(idx) == {"base_query"}
|
0536222c
tangwang
query parser优化
|
386
387
388
389
390
391
392
393
394
395
396
|
# --- 与 KNN 同存时仍能解析文本子句(顶层 knn 不影响 query 内结构) ---
def test_text_clauses_present_alongside_knn():
qb = _builder_multilingual_title_only(default_language="en")
parsed = SimpleNamespace(
rewritten_query="dress",
detected_language="en",
translations={"zh": "连衣裙"},
|
0536222c
tangwang
query parser优化
|
397
398
399
400
401
402
|
)
q = qb.build_query(
query_text="dress",
query_vector=np.array([0.1, 0.2, 0.3], dtype=np.float32),
parsed_query=parsed,
enable_knn=True,
|
0536222c
tangwang
query parser优化
|
403
|
)
|
ed13851c
tangwang
图片文本两个knn召回相关参数配置
|
404
405
406
407
|
qr = q["query"]
if "function_score" in qr:
qr = qr["function_score"]["query"]
assert any("knn" in c for c in qr["bool"]["should"])
|
0536222c
tangwang
query parser优化
|
408
|
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
409
|
assert set(idx) == {"base_query", "base_query_trans_zh"}
|
0536222c
tangwang
query parser优化
|
410
411
412
413
414
415
416
417
418
|
def test_detected_language_unknown_falls_back_to_default_language():
"""与 LanguageDetector 失败时 QueryConfig.default_language 行为对齐。"""
qb = _builder_multilingual_title_only(default_language="en")
parsed = SimpleNamespace(
rewritten_query="shirt",
detected_language="unknown",
translations={"zh": "衬衫"},
|
0536222c
tangwang
query parser优化
|
419
420
421
422
423
|
)
q = qb.build_query(
query_text="shirt",
parsed_query=parsed,
enable_knn=False,
|
0536222c
tangwang
query parser优化
|
424
425
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
426
427
|
assert set(idx) == {"base_query", "base_query_trans_zh"}
assert _combined_fields_clause(idx["base_query"])["query"] == "shirt"
|
0536222c
tangwang
query parser优化
|
428
429
430
431
432
433
434
435
436
437
438
|
assert _has_title_lang(idx["base_query"], "en")
def test_ru_query_index_ru_en_includes_base_ru_and_trans_en():
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="платье",
rewritten="платье",
detected_language="ru",
translations={"en": "dress"},
|
0536222c
tangwang
query parser优化
|
439
440
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
441
442
|
assert set(idx) == {"base_query", "base_query_trans_en"}
assert _combined_fields_clause(idx["base_query"])["query"] == "платье"
|
0536222c
tangwang
query parser优化
|
443
|
assert _has_title_lang(idx["base_query"], "ru")
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
444
|
assert _combined_fields_clause(idx["base_query_trans_en"])["query"] == "dress"
|
0536222c
tangwang
query parser优化
|
445
446
|
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
447
448
|
def test_translation_generates_clause_for_any_target_lang_key():
"""translations 里非空的每个语种键都会生成对应 base_query_trans_* 子句。"""
|
0536222c
tangwang
query parser优化
|
449
450
451
452
453
454
455
|
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text="dress",
rewritten="dress",
detected_language="en",
translations={"zh": "连衣裙", "de": "Kleid"},
|
0536222c
tangwang
query parser优化
|
456
457
458
|
)
idx = _clauses_index(q)
assert "base_query_trans_de" in idx
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
459
|
assert _combined_fields_clause(idx["base_query_trans_de"])["query"] == "Kleid"
|
0536222c
tangwang
query parser优化
|
460
461
462
463
464
465
466
467
468
469
470
471
|
assert _has_title_lang(idx["base_query_trans_de"], "de")
def test_mixed_detected_zh_rewrite_differs_from_query_text_uses_rewritten_in_base():
"""base_query 始终用 rewritten_query,而非仅 query_text。"""
qb = _builder_multilingual_title_only(default_language="en")
q = _build(
qb,
query_text=" 红色 ",
rewritten="红色连衣裙",
detected_language="zh",
translations={"en": "red dress"},
|
0536222c
tangwang
query parser优化
|
472
473
|
)
idx = _clauses_index(q)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
474
475
|
assert _combined_fields_clause(idx["base_query"])["query"] == "红色连衣裙"
assert _combined_fields_clause(idx["base_query_trans_en"])["query"] == "red dress"
|