Blame view

tests/test_query_parser_mixed_language.py 2.37 KB
a8261ece   tangwang   检索效果优化
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  from types import SimpleNamespace
  
  from config import FunctionScoreConfig, IndexConfig, QueryConfig, RerankConfig, SPUConfig, SearchConfig
  from query.query_parser import QueryParser
  
  
  class _DummyTranslator:
      def translate(self, text, target_lang, source_lang, scene, model_name):
          return f"{text}-{target_lang}"
  
  
  def _build_config() -> SearchConfig:
      return SearchConfig(
          es_index_name="test_products",
          field_boosts={"title.en": 3.0, "title.zh": 3.0},
          indexes=[IndexConfig(name="default", label="default", fields=["title.en", "title.zh"])],
          query_config=QueryConfig(
              enable_text_embedding=False,
              enable_query_rewrite=False,
              supported_languages=["en", "zh"],
              default_language="zh",
          ),
          function_score=FunctionScoreConfig(),
          rerank=RerankConfig(),
          spu_config=SPUConfig(enabled=False),
      )
  
  
  def test_parse_adds_en_fields_for_mixed_chinese_query_with_meaningful_english(monkeypatch):
      parser = QueryParser(_build_config(), translator=_DummyTranslator())
      monkeypatch.setattr(parser.language_detector, "detect", lambda text: "zh")
      monkeypatch.setattr(
          "query.query_parser.get_tenant_config_loader",
          lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["zh", "en"]}),
          raising=False,
      )
  
      result = parser.parse("法式 dress 连衣裙", tenant_id="162", generate_vector=False)
  
      assert result.detected_language == "zh"
      assert "en" in result.search_langs
      assert result.query_text_by_lang["en"] == "法式 dress 连衣裙"
      assert result.query_text_by_lang["zh"] == "法式 dress 连衣裙"
  
  
  def test_parse_adds_zh_fields_for_english_query_when_cjk_present(monkeypatch):
      parser = QueryParser(_build_config(), translator=_DummyTranslator())
      monkeypatch.setattr(parser.language_detector, "detect", lambda text: "en")
      monkeypatch.setattr(
          "query.query_parser.get_tenant_config_loader",
          lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}),
          raising=False,
      )
  
      result = parser.parse("red 连衣裙", tenant_id="0", generate_vector=False)
  
      assert result.detected_language == "en"
      assert "zh" in result.search_langs
      assert result.query_text_by_lang["zh"] == "red 连衣裙"
      assert result.query_text_by_lang["en"] == "red 连衣裙"