Commit 501066e10913edc8e5b5e11f7dd045b22424d1bb
1 parent
beef466f
redis 缓存 LLM结果
Showing
2 changed files
with
104 additions
and
0 deletions
Show diff stats
indexer/document_transformer.py
| ... | ... | @@ -512,12 +512,15 @@ class SPUDocumentTransformer: |
| 512 | 512 | "features", |
| 513 | 513 | ] |
| 514 | 514 | |
| 515 | + tenant_id = doc.get("tenant_id") | |
| 516 | + | |
| 515 | 517 | for lang in llm_langs: |
| 516 | 518 | try: |
| 517 | 519 | rows = analyze_products( |
| 518 | 520 | products=[{"id": spu_id, "title": title}], |
| 519 | 521 | target_lang=lang, |
| 520 | 522 | batch_size=1, |
| 523 | + tenant_id=str(tenant_id), | |
| 521 | 524 | ) |
| 522 | 525 | except Exception as e: |
| 523 | 526 | logger.warning( | ... | ... |
indexer/process_products.py
| ... | ... | @@ -9,13 +9,18 @@ import os |
| 9 | 9 | import json |
| 10 | 10 | import logging |
| 11 | 11 | import time |
| 12 | +import hashlib | |
| 12 | 13 | from datetime import datetime |
| 13 | 14 | from typing import List, Dict, Tuple, Any, Optional |
| 15 | + | |
| 16 | +import redis | |
| 14 | 17 | import requests |
| 15 | 18 | from pathlib import Path |
| 16 | 19 | from requests.adapters import HTTPAdapter |
| 17 | 20 | from urllib3.util.retry import Retry |
| 18 | 21 | |
| 22 | +from config.env_config import REDIS_CONFIG | |
| 23 | + | |
| 19 | 24 | # 配置 |
| 20 | 25 | BATCH_SIZE = 20 |
| 21 | 26 | API_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" |
| ... | ... | @@ -51,6 +56,29 @@ logging.basicConfig( |
| 51 | 56 | logger = logging.getLogger(__name__) |
| 52 | 57 | |
| 53 | 58 | |
| 59 | +# Redis 缓存(用于 anchors / 语义属性) | |
| 60 | +ANCHOR_CACHE_PREFIX = REDIS_CONFIG.get("anchor_cache_prefix", "product_anchors") | |
| 61 | +ANCHOR_CACHE_EXPIRE_DAYS = int(REDIS_CONFIG.get("anchor_cache_expire_days", 30)) | |
| 62 | +_anchor_redis: Optional[redis.Redis] = None | |
| 63 | + | |
| 64 | +try: | |
| 65 | + _anchor_redis = redis.Redis( | |
| 66 | + host=REDIS_CONFIG.get("host", "localhost"), | |
| 67 | + port=REDIS_CONFIG.get("port", 6479), | |
| 68 | + password=REDIS_CONFIG.get("password"), | |
| 69 | + decode_responses=True, | |
| 70 | + socket_timeout=REDIS_CONFIG.get("socket_timeout", 1), | |
| 71 | + socket_connect_timeout=REDIS_CONFIG.get("socket_connect_timeout", 1), | |
| 72 | + retry_on_timeout=REDIS_CONFIG.get("retry_on_timeout", False), | |
| 73 | + health_check_interval=10, | |
| 74 | + ) | |
| 75 | + _anchor_redis.ping() | |
| 76 | + logger.info("Redis cache initialized for product anchors and semantic attributes") | |
| 77 | +except Exception as e: | |
| 78 | + logger.warning(f"Failed to initialize Redis for anchors cache: {e}") | |
| 79 | + _anchor_redis = None | |
| 80 | + | |
| 81 | + | |
| 54 | 82 | LANG_LABELS: Dict[str, str] = { |
| 55 | 83 | "zh": "中文", |
| 56 | 84 | "en": "英文", |
| ... | ... | @@ -93,6 +121,51 @@ SYSTEM_MESSAGES: Dict[str, str] = { |
| 93 | 121 | } |
| 94 | 122 | |
| 95 | 123 | |
| 124 | +def _make_anchor_cache_key( | |
| 125 | + title: str, | |
| 126 | + target_lang: str, | |
| 127 | + tenant_id: Optional[str] = None, | |
| 128 | +) -> str: | |
| 129 | + """构造 anchors/语义属性的缓存 key。""" | |
| 130 | + base = (tenant_id or "global").strip() | |
| 131 | + h = hashlib.md5(title.encode("utf-8")).hexdigest() | |
| 132 | + return f"{ANCHOR_CACHE_PREFIX}:{base}:{target_lang}:{h}" | |
| 133 | + | |
| 134 | + | |
| 135 | +def _get_cached_anchor_result( | |
| 136 | + title: str, | |
| 137 | + target_lang: str, | |
| 138 | + tenant_id: Optional[str] = None, | |
| 139 | +) -> Optional[Dict[str, Any]]: | |
| 140 | + if not _anchor_redis: | |
| 141 | + return None | |
| 142 | + try: | |
| 143 | + key = _make_anchor_cache_key(title, target_lang, tenant_id) | |
| 144 | + raw = _anchor_redis.get(key) | |
| 145 | + if not raw: | |
| 146 | + return None | |
| 147 | + return json.loads(raw) | |
| 148 | + except Exception as e: | |
| 149 | + logger.warning(f"Failed to get anchor cache: {e}") | |
| 150 | + return None | |
| 151 | + | |
| 152 | + | |
| 153 | +def _set_cached_anchor_result( | |
| 154 | + title: str, | |
| 155 | + target_lang: str, | |
| 156 | + result: Dict[str, Any], | |
| 157 | + tenant_id: Optional[str] = None, | |
| 158 | +) -> None: | |
| 159 | + if not _anchor_redis: | |
| 160 | + return | |
| 161 | + try: | |
| 162 | + key = _make_anchor_cache_key(title, target_lang, tenant_id) | |
| 163 | + ttl = ANCHOR_CACHE_EXPIRE_DAYS * 24 * 3600 | |
| 164 | + _anchor_redis.setex(key, ttl, json.dumps(result, ensure_ascii=False)) | |
| 165 | + except Exception as e: | |
| 166 | + logger.warning(f"Failed to set anchor cache: {e}") | |
| 167 | + | |
| 168 | + | |
| 96 | 169 | def create_prompt(products: List[Dict[str, str]], target_lang: str = "zh") -> str: |
| 97 | 170 | """根据目标语言创建 LLM 提示词和表头说明。""" |
| 98 | 171 | if target_lang == "en": |
| ... | ... | @@ -537,6 +610,7 @@ def analyze_products( |
| 537 | 610 | products: List[Dict[str, str]], |
| 538 | 611 | target_lang: str = "zh", |
| 539 | 612 | batch_size: Optional[int] = None, |
| 613 | + tenant_id: Optional[str] = None, | |
| 540 | 614 | ) -> List[Dict[str, Any]]: |
| 541 | 615 | """ |
| 542 | 616 | 库调用入口:根据输入+语言,返回锚文本及各维度信息。 |
| ... | ... | @@ -555,6 +629,19 @@ def analyze_products( |
| 555 | 629 | if not products: |
| 556 | 630 | return [] |
| 557 | 631 | |
| 632 | + # 简单路径:索引阶段通常 batch_size=1,这里优先做单条缓存命中 | |
| 633 | + if len(products) == 1: | |
| 634 | + p = products[0] | |
| 635 | + title = str(p.get("title") or "").strip() | |
| 636 | + if title: | |
| 637 | + cached = _get_cached_anchor_result(title, target_lang, tenant_id=tenant_id) | |
| 638 | + if cached: | |
| 639 | + logger.info( | |
| 640 | + f"[analyze_products] Cache hit for title='{title[:50]}...', " | |
| 641 | + f"lang={target_lang}, tenant_id={tenant_id or 'global'}" | |
| 642 | + ) | |
| 643 | + return [cached] | |
| 644 | + | |
| 558 | 645 | bs = batch_size or BATCH_SIZE |
| 559 | 646 | all_results: List[Dict[str, Any]] = [] |
| 560 | 647 | total_batches = (len(products) + bs - 1) // bs |
| ... | ... | @@ -569,4 +656,18 @@ def analyze_products( |
| 569 | 656 | batch_results = process_batch(batch, batch_num=batch_num, target_lang=target_lang) |
| 570 | 657 | all_results.extend(batch_results) |
| 571 | 658 | |
| 659 | + # 写入缓存 | |
| 660 | + for item in batch_results: | |
| 661 | + title_input = str(item.get("title_input") or "").strip() | |
| 662 | + if not title_input: | |
| 663 | + continue | |
| 664 | + if item.get("error"): | |
| 665 | + # 不缓存错误结果,避免放大临时故障 | |
| 666 | + continue | |
| 667 | + try: | |
| 668 | + _set_cached_anchor_result(title_input, target_lang, item, tenant_id=tenant_id) | |
| 669 | + except Exception: | |
| 670 | + # 已在内部记录 warning | |
| 671 | + pass | |
| 672 | + | |
| 572 | 673 | return all_results | ... | ... |