diff --git a/config/env_config.py b/config/env_config.py index 936c388..63774ff 100644 --- a/config/env_config.py +++ b/config/env_config.py @@ -44,6 +44,8 @@ REDIS_CONFIG = { 'cache_expire_days': int(os.getenv('REDIS_CACHE_EXPIRE_DAYS', 360*2)), # 6 months 'translation_cache_expire_days': int(os.getenv('REDIS_TRANSLATION_CACHE_EXPIRE_DAYS', 360*2)), 'translation_cache_prefix': os.getenv('REDIS_TRANSLATION_CACHE_PREFIX', 'trans'), + # Embedding 缓存 key 前缀,例如 "embedding" + 'embedding_cache_prefix': os.getenv('REDIS_EMBEDDING_CACHE_PREFIX', 'embedding'), } # DeepL API Key diff --git a/docs/缓存与Redis使用说明.md b/docs/缓存与Redis使用说明.md index b2c1e2e..8563978 100644 --- a/docs/缓存与Redis使用说明.md +++ b/docs/缓存与Redis使用说明.md @@ -20,7 +20,7 @@ | 模块 / 场景 | Key 模板 | Value 内容示例 | 过期策略 | 备注 | |------------|----------|----------------|----------|------| -| 文本向量缓存(embedding) | `embedding:{language}:{norm_flag}:{query}` | `pickle.dumps(np.ndarray)`,如 1024 维 BGE 向量 | TTL=`REDIS_CONFIG["cache_expire_days"]` 天;访问时滑动过期 | 见 `embeddings/text_encoder.py` | +| 文本向量缓存(embedding) | `{EMBEDDING_CACHE_PREFIX}:{query}` | `pickle.dumps(np.ndarray)`,如 1024 维 BGE 向量 | TTL=`REDIS_CONFIG["cache_expire_days"]` 天;访问时滑动过期 | 见 `embeddings/text_encoder.py`,前缀由 `REDIS_CONFIG["embedding_cache_prefix"]` 控制 | | 翻译结果缓存(Qwen-MT 翻译) | `{cache_prefix}:{model}:{src}:{tgt}:{sha256(payload)}` | 机翻后的单条字符串 | TTL=`services.translation.cache.ttl_seconds` 秒;可配置滑动过期 | 见 `query/qwen_mt_translate.py` + `config/config.yaml` | | 商品内容理解缓存(anchors / 语义属性 / tags) | `{ANCHOR_CACHE_PREFIX}:{tenant_or_global}:{target_lang}:{md5(title)}` | `json.dumps(dict)`,包含 id/title/category/tags/anchor_text 等 | TTL=`ANCHOR_CACHE_EXPIRE_DAYS` 天 | 见 `indexer/product_enrich.py` | @@ -35,16 +35,16 @@ ### 2.1 Key 设计 -- 函数:`_get_cache_key(query: str, language: str, normalize_embeddings: bool) -> str` +- 函数:`_get_cache_key(query: str, normalize_embeddings: bool) -> str` - 模板: ```text -embedding:{language}:{norm_flag}:{query} +{EMBEDDING_CACHE_PREFIX}:{query} ``` - 字段说明: - - `language`:当前实现中统一传入 `"generic"`; - - `norm_flag`:`"norm1"` 表示归一化向量,`"norm0"` 表示未归一化; + - `EMBEDDING_CACHE_PREFIX`:来自 `REDIS_CONFIG["embedding_cache_prefix"]`,默认值为 `"embedding"`,可通过环境变量 `REDIS_EMBEDDING_CACHE_PREFIX` 覆盖; + - 当前实现**不再区分 language 与 normalize flag**,即无论是否归一化,key 结构都相同; - `query`:原始文本(未做哈希),注意长度特别长的 query 会直接出现在 key 中。 ### 2.2 Value 与类型 diff --git a/embeddings/clip_as_service_encoder.py b/embeddings/clip_as_service_encoder.py index 2837067..16f12fd 100644 --- a/embeddings/clip_as_service_encoder.py +++ b/embeddings/clip_as_service_encoder.py @@ -54,13 +54,8 @@ class ClipAsServiceImageEncoder: show_progress: whether to show progress bar when encoding. """ _ensure_clip_client_path() - try: - from clip_client import Client - except ImportError as e: - raise ImportError( - "clip_client not found. Add third-party/clip-as-service/client to PYTHONPATH " - "or run: pip install -e third-party/clip-as-service/client" - ) from e + + from clip_client import Client self._server = server self._batch_size = batch_size diff --git a/embeddings/text_encoder.py b/embeddings/text_encoder.py index acf54fc..04004c9 100644 --- a/embeddings/text_encoder.py +++ b/embeddings/text_encoder.py @@ -15,11 +15,7 @@ logger = logging.getLogger(__name__) from config.services_config import get_embedding_base_url # Try to import REDIS_CONFIG, but allow import to fail -try: - from config.env_config import REDIS_CONFIG -except ImportError: - REDIS_CONFIG = {} - +from config.env_config import REDIS_CONFIG class TextEmbeddingEncoder: """ @@ -31,6 +27,7 @@ class TextEmbeddingEncoder: self.service_url = str(resolved_url).rstrip("/") self.endpoint = f"{self.service_url}/embed/text" self.expire_time = timedelta(days=REDIS_CONFIG.get("cache_expire_days", 180)) + self.cache_prefix = str(REDIS_CONFIG.get("embedding_cache_prefix", "embedding")).strip() or "embedding" logger.info("Creating TextEmbeddingEncoder instance with service URL: %s", self.service_url) try: @@ -104,7 +101,7 @@ class TextEmbeddingEncoder: embeddings: List[Optional[np.ndarray]] = [None] * len(sentences) for i, text in enumerate(sentences): - cached = self._get_cached_embedding(text, "generic", normalize_embeddings) + cached = self._get_cached_embedding(text) if cached is not None: embeddings[i] = cached else: @@ -130,7 +127,7 @@ class TextEmbeddingEncoder: embedding_array = np.array(embedding, dtype=np.float32) if self._is_valid_embedding(embedding_array): embeddings[original_idx] = embedding_array - self._set_cached_embedding(text, "generic", embedding_array, normalize_embeddings) + self._set_cached_embedding(text, embedding_array, normalize_embeddings) else: raise ValueError( f"Invalid embedding returned from service for text index {original_idx}" @@ -165,12 +162,7 @@ class TextEmbeddingEncoder: device=device, normalize_embeddings=normalize_embeddings, ) - - def _get_cache_key(self, query: str, language: str, normalize_embeddings: bool = True) -> str: - """Generate a cache key for the query""" - norm_flag = "norm1" if normalize_embeddings else "norm0" - return f"embedding:{language}:{norm_flag}:{query}" - + def _is_valid_embedding(self, embedding: np.ndarray) -> bool: """ Check if embedding is valid (not None, correct shape, no NaN/Inf). @@ -194,16 +186,14 @@ class TextEmbeddingEncoder: def _get_cached_embedding( self, - query: str, - language: str, - normalize_embeddings: bool = True, + query: str ) -> Optional[np.ndarray]: """Get embedding from cache if exists (with sliding expiration)""" if not self.redis_client: return None try: - cache_key = self._get_cache_key(query, language, normalize_embeddings) + cache_key = f"{self.cache_prefix}:{query}" cached_data = self.redis_client.get(cache_key) if cached_data: embedding = pickle.loads(cached_data) @@ -232,7 +222,6 @@ class TextEmbeddingEncoder: def _set_cached_embedding( self, query: str, - language: str, embedding: np.ndarray, normalize_embeddings: bool = True, ) -> bool: @@ -241,7 +230,7 @@ class TextEmbeddingEncoder: return False try: - cache_key = self._get_cache_key(query, language, normalize_embeddings) + cache_key = f"{self.cache_prefix}:{query}" serialized_data = pickle.dumps(embedding) self.redis_client.setex( cache_key, diff --git a/indexer/document_transformer.py b/indexer/document_transformer.py index 993fc96..1dfea72 100644 --- a/indexer/document_transformer.py +++ b/indexer/document_transformer.py @@ -18,13 +18,7 @@ from indexer.product_enrich import analyze_products logger = logging.getLogger(__name__) -# Try to import translator (optional dependency) -try: - from query.qwen_mt_translate import Translator - TRANSLATOR_AVAILABLE = True -except ImportError: - TRANSLATOR_AVAILABLE = False - Translator = None +from query.qwen_mt_translate import Translator class SPUDocumentTransformer: diff --git a/reranker/backends/qwen3_transformers.py b/reranker/backends/qwen3_transformers.py index 7955167..b452020 100644 --- a/reranker/backends/qwen3_transformers.py +++ b/reranker/backends/qwen3_transformers.py @@ -13,15 +13,8 @@ from typing import Any, Dict, List, Optional, Tuple logger = logging.getLogger("reranker.backends.qwen3_transformers") -try: - import torch - from transformers import AutoModelForCausalLM, AutoTokenizer -except ImportError as e: - raise ImportError( - "Qwen3-Transformers reranker backend requires transformers>=4.51.0 and torch. " - "Install with: pip install transformers>=4.51.0 torch" - ) from e - +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer def _format_instruction(instruction: str, query: str, doc: str) -> str: """Format (query, doc) pair per official Qwen3-Reranker spec.""" diff --git a/reranker/backends/qwen3_vllm.py b/reranker/backends/qwen3_vllm.py index d953107..36b3e75 100644 --- a/reranker/backends/qwen3_vllm.py +++ b/reranker/backends/qwen3_vllm.py @@ -16,16 +16,10 @@ from typing import Any, Dict, List, Tuple logger = logging.getLogger("reranker.backends.qwen3_vllm") -try: - import torch - from transformers import AutoTokenizer - from vllm import LLM, SamplingParams - from vllm.inputs.data import TokensPrompt -except ImportError as e: - raise ImportError( - "Qwen3-vLLM reranker backend requires vllm>=0.8.5 and transformers. " - "Install with: pip install vllm transformers" - ) from e +import torch +from transformers import AutoTokenizer +from vllm import LLM, SamplingParams +from vllm.inputs.data import TokensPrompt def deduplicate_with_positions(texts: List[str]) -> Tuple[List[str], List[int]]: diff --git a/tests/test_embedding_pipeline.py b/tests/test_embedding_pipeline.py index 482a525..b08d4aa 100644 --- a/tests/test_embedding_pipeline.py +++ b/tests/test_embedding_pipeline.py @@ -128,7 +128,7 @@ def test_text_embedding_encoder_raises_on_missing_vector(monkeypatch): def test_text_embedding_encoder_cache_hit(monkeypatch): fake_redis = _FakeRedis() cached = np.array([0.9, 0.8], dtype=np.float32) - fake_redis.store["embedding:generic:cached-text"] = pickle.dumps(cached) + fake_redis.store["embedding:cached-text"] = pickle.dumps(cached) monkeypatch.setattr("embeddings.text_encoder.redis.Redis", lambda **kwargs: fake_redis) calls = {"count": 0} diff --git a/utils/es_client.py b/utils/es_client.py index 8896e32..248fc36 100644 --- a/utils/es_client.py +++ b/utils/es_client.py @@ -8,11 +8,7 @@ from typing import Dict, Any, List, Optional import os import logging -# Try to import ES_CONFIG, but allow import to fail -try: - from config.env_config import ES_CONFIG -except ImportError: - ES_CONFIG = None +from config.env_config import ES_CONFIG logger = logging.getLogger(__name__) -- libgit2 0.21.2