7fbca0d7
tangwang
启动脚本优化
|
1
|
from types import SimpleNamespace
|
a3d3fb11
tangwang
加phrase提权
|
2
|
from typing import Any, Dict
|
7fbca0d7
tangwang
启动脚本优化
|
3
4
5
6
7
8
9
10
11
|
import numpy as np
from search.es_query_builder import ESQueryBuilder
def _builder() -> ESQueryBuilder:
return ESQueryBuilder(
match_fields=["title.en^3.0", "brief.en^1.0"],
|
35da3813
tangwang
中英混写query的优化逻辑,不适...
|
12
13
14
|
multilingual_fields=["title", "brief"],
core_multilingual_fields=["title", "brief"],
shared_fields=[],
|
7fbca0d7
tangwang
启动脚本优化
|
15
16
17
18
19
|
text_embedding_field="title_embedding",
default_language="en",
)
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
20
21
22
23
|
def _lexical_clause(query_root: Dict[str, Any]) -> Dict[str, Any]:
"""Return the first named lexical bool clause from query_root."""
if "bool" in query_root and query_root["bool"].get("_name"):
return query_root["bool"]
|
a3d3fb11
tangwang
加phrase提权
|
24
|
for clause in query_root.get("bool", {}).get("should", []):
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
25
26
27
28
29
30
|
clause_bool = clause.get("bool") or {}
if clause_bool.get("_name"):
return clause_bool
raise AssertionError("no lexical bool clause in query_root")
|
7fbca0d7
tangwang
启动脚本优化
|
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
def test_knn_prefilter_includes_range_filters():
qb = _builder()
q = qb.build_query(
query_text="bags",
query_vector=np.array([0.1, 0.2, 0.3]),
range_filters={"min_price": {"gte": 50, "lt": 100}},
enable_knn=True,
)
assert "knn" in q
assert q["knn"]["filter"] == {"range": {"min_price": {"gte": 50, "lt": 100}}}
def test_knn_prefilter_uses_only_conjunctive_filters_when_disjunctive_present():
qb = _builder()
facets = [SimpleNamespace(field="category_name", disjunctive=True)]
q = qb.build_query(
query_text="bags",
query_vector=np.array([0.1, 0.2, 0.3]),
filters={"category_name": ["A", "B"], "vendor": "Nike"},
range_filters={"min_price": {"gte": 50, "lt": 100}},
facet_configs=facets,
enable_knn=True,
)
assert "knn" in q
assert "filter" in q["knn"]
knn_filter = q["knn"]["filter"]
assert knn_filter == {
"bool": {
"filter": [
{"term": {"vendor": "Nike"}},
{"range": {"min_price": {"gte": 50, "lt": 100}}},
]
}
}
assert q["post_filter"] == {"terms": {"category_name": ["A", "B"]}}
def test_knn_prefilter_not_added_without_filters():
qb = _builder()
q = qb.build_query(
query_text="bags",
query_vector=np.array([0.1, 0.2, 0.3]),
enable_knn=True,
)
assert "knn" in q
assert "filter" not in q["knn"]
|
a8261ece
tangwang
检索效果优化
|
80
|
assert q["knn"]["_name"] == "knn_query"
|
c90f80ed
tangwang
相关性优化
|
81
82
|
|
ef5baa86
tangwang
混杂语言处理
|
83
|
def test_text_query_contains_only_base_and_translation_named_queries():
|
c90f80ed
tangwang
相关性优化
|
84
85
|
qb = _builder()
parsed_query = SimpleNamespace(
|
ef5baa86
tangwang
混杂语言处理
|
86
|
rewritten_query="dress",
|
c90f80ed
tangwang
相关性优化
|
87
|
detected_language="en",
|
ef5baa86
tangwang
混杂语言处理
|
88
|
translations={"en": "dress", "zh": "连衣裙"},
|
c90f80ed
tangwang
相关性优化
|
89
90
|
)
|
ef5baa86
tangwang
混杂语言处理
|
91
92
93
94
|
q = qb.build_query(
query_text="dress",
parsed_query=parsed_query,
enable_knn=False,
|
ef5baa86
tangwang
混杂语言处理
|
95
|
)
|
c90f80ed
tangwang
相关性优化
|
96
|
should = q["query"]["bool"]["should"]
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
97
|
names = [clause["bool"]["_name"] for clause in should]
|
c90f80ed
tangwang
相关性优化
|
98
|
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
99
100
101
|
assert names == ["base_query", "base_query_trans_zh"]
base_should = q["query"]["bool"]["should"][0]["bool"]["should"]
assert [clause["multi_match"]["type"] for clause in base_should] == ["best_fields", "phrase"]
|
ef5baa86
tangwang
混杂语言处理
|
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
def test_text_query_skips_duplicate_translation_same_as_base():
qb = _builder()
parsed_query = SimpleNamespace(
rewritten_query="dress",
detected_language="en",
translations={"en": "dress"},
)
q = qb.build_query(
query_text="dress",
parsed_query=parsed_query,
enable_knn=False,
|
ef5baa86
tangwang
混杂语言处理
|
116
117
|
)
|
a3d3fb11
tangwang
加phrase提权
|
118
|
root = q["query"]
|
e756b18e
tangwang
重构了文本召回构建器,现在每个 b...
|
119
120
|
assert root["bool"]["_name"] == "base_query"
assert [clause["multi_match"]["type"] for clause in root["bool"]["should"]] == ["best_fields", "phrase"]
|