from types import SimpleNamespace from config import FunctionScoreConfig, IndexConfig, QueryConfig, RerankConfig, SPUConfig, SearchConfig from query.query_parser import QueryParser class _DummyTranslator: def translate(self, text, target_lang, source_lang, scene, model_name): return f"{text}-{target_lang}" def _build_config() -> SearchConfig: return SearchConfig( es_index_name="test_products", field_boosts={"title.en": 3.0, "title.zh": 3.0}, indexes=[IndexConfig(name="default", label="default", fields=["title.en", "title.zh"])], query_config=QueryConfig( enable_text_embedding=False, enable_query_rewrite=False, supported_languages=["en", "zh"], default_language="zh", ), function_score=FunctionScoreConfig(), rerank=RerankConfig(), spu_config=SPUConfig(enabled=False), ) def test_parse_adds_en_fields_for_mixed_chinese_query_with_meaningful_english(monkeypatch): parser = QueryParser(_build_config(), translator=_DummyTranslator()) monkeypatch.setattr(parser.language_detector, "detect", lambda text: "zh") monkeypatch.setattr( "query.query_parser.get_tenant_config_loader", lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["zh", "en"]}), raising=False, ) result = parser.parse("法式 dress 连衣裙", tenant_id="162", generate_vector=False) assert result.detected_language == "zh" assert "en" in result.search_langs # 翻译在预算内完成时会写入目标语言字段(优于仅用原文做 supplemental 探测) assert result.query_text_by_lang["en"] == "法式 dress 连衣裙-en" assert result.query_text_by_lang["zh"] == "法式 dress 连衣裙" def test_parse_adds_zh_fields_for_english_query_when_cjk_present(monkeypatch): parser = QueryParser(_build_config(), translator=_DummyTranslator()) monkeypatch.setattr(parser.language_detector, "detect", lambda text: "en") monkeypatch.setattr( "query.query_parser.get_tenant_config_loader", lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}), raising=False, ) result = parser.parse("red 连衣裙", tenant_id="0", generate_vector=False) assert result.detected_language == "en" assert "zh" in result.search_langs assert result.query_text_by_lang["zh"] == "red 连衣裙-zh" assert result.query_text_by_lang["en"] == "red 连衣裙" def test_parse_waits_for_translation_when_source_in_index_languages(monkeypatch): """en 在 index_languages 内时仍应等待并采纳 en->zh 翻译结果(与向量共用预算)。""" parser = QueryParser(_build_config(), translator=_DummyTranslator()) monkeypatch.setattr(parser.language_detector, "detect", lambda text: "en") monkeypatch.setattr( "query.query_parser.get_tenant_config_loader", lambda: SimpleNamespace(get_tenant_config=lambda tenant_id: {"index_languages": ["en", "zh"]}), raising=False, ) result = parser.parse("off shoulder top", tenant_id="0", generate_vector=False) assert result.detected_language == "en" assert result.translations.get("zh") == "off shoulder top-zh" assert result.query_text_by_lang.get("zh") == "off shoulder top-zh" assert result.source_in_index_languages is True