Blame view

tests/test_es_query_builder.py 3.61 KB
7fbca0d7   tangwang   启动脚本优化
1
  from types import SimpleNamespace
a3d3fb11   tangwang   加phrase提权
2
  from typing import Any, Dict
7fbca0d7   tangwang   启动脚本优化
3
4
5
6
7
8
9
10
11
  
  import numpy as np
  
  from search.es_query_builder import ESQueryBuilder
  
  
  def _builder() -> ESQueryBuilder:
      return ESQueryBuilder(
          match_fields=["title.en^3.0", "brief.en^1.0"],
35da3813   tangwang   中英混写query的优化逻辑,不适...
12
13
14
          multilingual_fields=["title", "brief"],
          core_multilingual_fields=["title", "brief"],
          shared_fields=[],
7fbca0d7   tangwang   启动脚本优化
15
16
17
18
19
          text_embedding_field="title_embedding",
          default_language="en",
      )
  
  
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
20
21
22
23
  def _lexical_clause(query_root: Dict[str, Any]) -> Dict[str, Any]:
      """Return the first named lexical bool clause from query_root."""
      if "bool" in query_root and query_root["bool"].get("_name"):
          return query_root["bool"]
a3d3fb11   tangwang   加phrase提权
24
      for clause in query_root.get("bool", {}).get("should", []):
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
25
26
27
28
29
30
          clause_bool = clause.get("bool") or {}
          if clause_bool.get("_name"):
              return clause_bool
      raise AssertionError("no lexical bool clause in query_root")
  
  
7fbca0d7   tangwang   启动脚本优化
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
  def test_knn_prefilter_includes_range_filters():
      qb = _builder()
      q = qb.build_query(
          query_text="bags",
          query_vector=np.array([0.1, 0.2, 0.3]),
          range_filters={"min_price": {"gte": 50, "lt": 100}},
          enable_knn=True,
      )
  
      assert "knn" in q
      assert q["knn"]["filter"] == {"range": {"min_price": {"gte": 50, "lt": 100}}}
  
  
  def test_knn_prefilter_uses_only_conjunctive_filters_when_disjunctive_present():
      qb = _builder()
      facets = [SimpleNamespace(field="category_name", disjunctive=True)]
      q = qb.build_query(
          query_text="bags",
          query_vector=np.array([0.1, 0.2, 0.3]),
          filters={"category_name": ["A", "B"], "vendor": "Nike"},
          range_filters={"min_price": {"gte": 50, "lt": 100}},
          facet_configs=facets,
          enable_knn=True,
      )
  
      assert "knn" in q
      assert "filter" in q["knn"]
      knn_filter = q["knn"]["filter"]
      assert knn_filter == {
          "bool": {
              "filter": [
                  {"term": {"vendor": "Nike"}},
                  {"range": {"min_price": {"gte": 50, "lt": 100}}},
              ]
          }
      }
      assert q["post_filter"] == {"terms": {"category_name": ["A", "B"]}}
  
  
  def test_knn_prefilter_not_added_without_filters():
      qb = _builder()
      q = qb.build_query(
          query_text="bags",
          query_vector=np.array([0.1, 0.2, 0.3]),
          enable_knn=True,
      )
  
      assert "knn" in q
      assert "filter" not in q["knn"]
a8261ece   tangwang   检索效果优化
80
      assert q["knn"]["_name"] == "knn_query"
c90f80ed   tangwang   相关性优化
81
82
  
  
ef5baa86   tangwang   混杂语言处理
83
  def test_text_query_contains_only_base_and_translation_named_queries():
c90f80ed   tangwang   相关性优化
84
85
      qb = _builder()
      parsed_query = SimpleNamespace(
ef5baa86   tangwang   混杂语言处理
86
          rewritten_query="dress",
c90f80ed   tangwang   相关性优化
87
          detected_language="en",
ef5baa86   tangwang   混杂语言处理
88
          translations={"en": "dress", "zh": "连衣裙"},
c90f80ed   tangwang   相关性优化
89
90
      )
  
ef5baa86   tangwang   混杂语言处理
91
92
93
94
      q = qb.build_query(
          query_text="dress",
          parsed_query=parsed_query,
          enable_knn=False,
ef5baa86   tangwang   混杂语言处理
95
      )
c90f80ed   tangwang   相关性优化
96
      should = q["query"]["bool"]["should"]
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
97
      names = [clause["bool"]["_name"] for clause in should]
c90f80ed   tangwang   相关性优化
98
  
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
99
100
101
      assert names == ["base_query", "base_query_trans_zh"]
      base_should = q["query"]["bool"]["should"][0]["bool"]["should"]
      assert [clause["multi_match"]["type"] for clause in base_should] == ["best_fields", "phrase"]
ef5baa86   tangwang   混杂语言处理
102
103
104
105
106
107
108
109
110
111
112
113
114
115
  
  
  def test_text_query_skips_duplicate_translation_same_as_base():
      qb = _builder()
      parsed_query = SimpleNamespace(
          rewritten_query="dress",
          detected_language="en",
          translations={"en": "dress"},
      )
  
      q = qb.build_query(
          query_text="dress",
          parsed_query=parsed_query,
          enable_knn=False,
ef5baa86   tangwang   混杂语言处理
116
117
      )
  
a3d3fb11   tangwang   加phrase提权
118
      root = q["query"]
e756b18e   tangwang   重构了文本召回构建器,现在每个 b...
119
120
      assert root["bool"]["_name"] == "base_query"
      assert [clause["multi_match"]["type"] for clause in root["bool"]["should"]] == ["best_fields", "phrase"]