test_query_parser_mixed_language.py 3.34 KB
from types import SimpleNamespace

from config import FunctionScoreConfig, IndexConfig, QueryConfig, RerankConfig, SPUConfig, SearchConfig
from query.query_parser import QueryParser


class _DummyTranslator:
    def translate(self, text, target_lang, source_lang, scene, model_name):
        return f"{text}-{target_lang}"


def _build_config() -> SearchConfig:
    return SearchConfig(
        es_index_name="test_products",
        field_boosts={"title.en": 3.0, "title.zh": 3.0},
        indexes=[IndexConfig(name="default", label="default", fields=["title.en", "title.zh"])],
        query_config=QueryConfig(
            enable_text_embedding=False,
            enable_query_rewrite=False,
            supported_languages=["en", "zh"],
            default_language="zh",
        ),
        function_score=FunctionScoreConfig(),
        rerank=RerankConfig(),
        spu_config=SPUConfig(enabled=False),
    )


def test_parse_adds_en_fields_for_mixed_chinese_query_with_meaningful_english(monkeypatch):
    parser = QueryParser(_build_config(), translator=_DummyTranslator())
    monkeypatch.setattr(parser.language_detector, "detect", lambda text: "zh")
    monkeypatch.setattr(
        "query.query_parser.get_tenant_config_loader",
        lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["zh", "en"]}),
        raising=False,
    )

    result = parser.parse("法式 dress 连衣裙", tenant_id="162", generate_vector=False)

    assert result.detected_language == "zh"
    assert "en" in result.search_langs
    # 翻译在预算内完成时会写入目标语言字段(优于仅用原文做 supplemental 探测)
    assert result.query_text_by_lang["en"] == "法式 dress 连衣裙-en"
    assert result.query_text_by_lang["zh"] == "法式 dress 连衣裙"


def test_parse_adds_zh_fields_for_english_query_when_cjk_present(monkeypatch):
    parser = QueryParser(_build_config(), translator=_DummyTranslator())
    monkeypatch.setattr(parser.language_detector, "detect", lambda text: "en")
    monkeypatch.setattr(
        "query.query_parser.get_tenant_config_loader",
        lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}),
        raising=False,
    )

    result = parser.parse("red 连衣裙", tenant_id="0", generate_vector=False)

    assert result.detected_language == "en"
    assert "zh" in result.search_langs
    assert result.query_text_by_lang["zh"] == "red 连衣裙-zh"
    assert result.query_text_by_lang["en"] == "red 连衣裙"


def test_parse_waits_for_translation_when_source_in_index_languages(monkeypatch):
    """en 在 index_languages 内时仍应等待并采纳 en->zh 翻译结果(与向量共用预算)。"""
    parser = QueryParser(_build_config(), translator=_DummyTranslator())
    monkeypatch.setattr(parser.language_detector, "detect", lambda text: "en")
    monkeypatch.setattr(
        "query.query_parser.get_tenant_config_loader",
        lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}),
        raising=False,
    )

    result = parser.parse("off shoulder top", tenant_id="0", generate_vector=False)

    assert result.detected_language == "en"
    assert result.translations.get("zh") == "off shoulder top-zh"
    assert result.query_text_by_lang.get("zh") == "off shoulder top-zh"
    assert result.source_in_index_languages is True