from config import FunctionScoreConfig, IndexConfig, QueryConfig, RerankConfig, SPUConfig, SearchConfig from query.query_parser import QueryParser class _DummyTranslator: def translate(self, text, target_lang, source_lang, scene, model_name): return f"{text}-{target_lang}" def _tokenizer(text): return str(text).split() def _build_config() -> SearchConfig: return SearchConfig( es_index_name="test_products", field_boosts={"title.en": 3.0, "title.zh": 3.0}, indexes=[IndexConfig(name="default", label="default", fields=["title.en", "title.zh"])], query_config=QueryConfig( enable_text_embedding=False, enable_query_rewrite=False, supported_languages=["en", "zh"], default_language="zh", ), function_score=FunctionScoreConfig(), rerank=RerankConfig(), spu_config=SPUConfig(enabled=False), ) def test_parse_mixed_zh_query_translates_to_en(monkeypatch): parser = QueryParser(_build_config(), translator=_DummyTranslator(), tokenizer=_tokenizer) monkeypatch.setattr(parser.language_detector, "detect", lambda text: "zh") result = parser.parse( "法式 dress 连衣裙", tenant_id="162", generate_vector=False, target_languages=["zh", "en"], ) assert result.detected_language == "zh" assert result.translations == {"en": "法式 dress 连衣裙-en"} assert result.query_tokens == ["法式", "dress", "连衣裙"] assert not hasattr(result, "query_text_by_lang") assert not hasattr(result, "search_langs") def test_parse_mixed_en_query_translates_to_zh(monkeypatch): parser = QueryParser(_build_config(), translator=_DummyTranslator(), tokenizer=_tokenizer) monkeypatch.setattr(parser.language_detector, "detect", lambda text: "en") result = parser.parse( "red 连衣裙", tenant_id="0", generate_vector=False, target_languages=["en", "zh"], ) assert result.detected_language == "en" assert result.translations == {"zh": "red 连衣裙-zh"} assert result.query_tokens == ["red", "连衣裙"] def test_parse_waits_for_translation_when_source_in_index_languages(monkeypatch): """en 在 index_languages 内时仍应等待并采纳 en->zh 翻译结果(与向量共用预算)。""" parser = QueryParser(_build_config(), translator=_DummyTranslator(), tokenizer=_tokenizer) monkeypatch.setattr(parser.language_detector, "detect", lambda text: "en") result = parser.parse( "off shoulder top", tenant_id="0", generate_vector=False, target_languages=["en", "zh"], ) assert result.detected_language == "en" assert result.translations.get("zh") == "off shoulder top-zh" assert not hasattr(result, "source_in_index_languages")