diff --git a/config/config.yaml b/config/config.yaml index 83507f2..0078b30 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -241,10 +241,10 @@ services: infer_batch_size: 64 sort_by_doc_length: true length_sort_mode: "char" # char | token - instruction: "Given a shopping query, rank product titles by relevance" + instruction: "rank products by given query" qwen3_transformers: model_name: "Qwen/Qwen3-Reranker-0.6B" - instruction: "Given a shopping query, rank product titles by relevance" + instruction: "rank products by given query" max_length: 8192 batch_size: 64 use_fp16: true diff --git a/config/config_loader.py b/config/config_loader.py index c9ea4f8..e99fbf7 100644 --- a/config/config_loader.py +++ b/config/config_loader.py @@ -67,6 +67,14 @@ class QueryConfig: original_query_fallback_boost_when_translation_missing: float = 0.2 tie_breaker_base_query: float = 0.9 + # Query-time translation model selection (configurable) + # - zh_to_en_model: model for zh -> en + # - en_to_zh_model: model for en -> zh + # - default_translation_model: fallback model for all other language pairs + zh_to_en_model: str = "opus-mt-zh-en" + en_to_zh_model: str = "opus-mt-en-zh" + default_translation_model: str = "nllb-200-distilled-600m" + @dataclass class SPUConfig: @@ -262,6 +270,11 @@ class ConfigLoader: text_strategy_cfg.get("original_query_fallback_boost_when_translation_missing", 0.2) ), tie_breaker_base_query=float(text_strategy_cfg.get("tie_breaker_base_query", 0.9)), + zh_to_en_model=str(query_config_data.get("zh_to_en_model") or "opus-mt-zh-en"), + en_to_zh_model=str(query_config_data.get("en_to_zh_model") or "opus-mt-en-zh"), + default_translation_model=str( + query_config_data.get("default_translation_model") or "nllb-200-distilled-600m" + ), ) # Parse Function Score configuration diff --git a/docs/TODO.txt b/docs/TODO.txt index efa8479..bf75928 100644 --- a/docs/TODO.txt +++ b/docs/TODO.txt @@ -114,6 +114,10 @@ https://modelscope.cn/models/dengcao/Qwen3-Reranker-4B-GGUF/summary reranker 补充:nvidia/llama-nemotron-rerank-1b-v2 +https://huggingface.co/nvidia/llama-nemotron-rerank-1b-v2 +后端推理也建议使用vLLM +注意搜索相关资料,挖掘我的特斯拉 T4 GPU 的性能,充分挖掘性能 +你有充足的自由度进行实验 encoder架构。 比较新。 性能更好。 diff --git a/query/query_parser.py b/query/query_parser.py index cc73f9f..fef9d75 100644 --- a/query/query_parser.py +++ b/query/query_parser.py @@ -152,15 +152,20 @@ class QueryParser: return self._translator @staticmethod - def _pick_query_translation_model(source_lang: str, target_lang: str) -> str: - """Pick the translation capability for query-time translation.""" + def _pick_query_translation_model(source_lang: str, target_lang: str, config: SearchConfig) -> str: + """Pick the translation capability for query-time translation (configurable).""" src = str(source_lang or "").strip().lower() tgt = str(target_lang or "").strip().lower() + + # Use dedicated models for zh<->en if configured if src == "zh" and tgt == "en": - return "opus-mt-zh-en" + return config.query_config.zh_to_en_model if src == "en" and tgt == "zh": - return "opus-mt-en-zh" - return "deepl" + return config.query_config.en_to_zh_model + + # For any other language pairs, fall back to the configurable default model. + # By default this is `nllb-200-distilled-600m` (multi-lingual local model). + return config.query_config.default_translation_model def _simple_tokenize(self, text: str) -> List[str]: """ @@ -363,7 +368,7 @@ class QueryParser: thread_name_prefix="query-translation-wait", ) for lang in target_langs: - model_name = self._pick_query_translation_model(detected_lang, lang) + model_name = self._pick_query_translation_model(detected_lang, lang, self.config) log_debug( f"Submitting query translation | source={detected_lang} target={lang} model={model_name}" ) @@ -377,7 +382,7 @@ class QueryParser: ) else: for lang in target_langs: - model_name = self._pick_query_translation_model(detected_lang, lang) + model_name = self._pick_query_translation_model(detected_lang, lang, self.config) log_debug( f"Submitting query translation | source={detected_lang} target={lang} model={model_name}" ) -- libgit2 0.21.2