Commit 89638140e284ada7049861a3f971f51678a29e09
1 parent
2e48a32d
重构 indexer 文档构建接口与测试示例
- 新增 /indexer/build-docs 与 /indexer/build-docs-from-db 接口:前者接收上游传入的 SPU/SKU/Option 原始行数据构建 ES doc(不写 ES),后者在测试场景下基于 tenant_id+spu_ids 内部查库并复用同一套文档构建逻辑
- 调整增量与全量索引 SQL 与聚合逻辑:移除 shoplazza_product_spu.compare_at_price 读取,统一从 SKU 表聚合最大 compare_at_price,修复 1054 列不存在错误,保证 ES 字段 compare_at_price 来源与索引字段说明v2 保持一致
- 更新 SPUDocumentTransformer:完善价格区间计算、compare_at_price 聚合以及多语言字段输出,确保输出结构与 mappings/search_products.json、Java 侧 ProductIndexDocument 完全对齐
- 为 indexer 模块补充 README 与 prompts:系统化说明 Java 调度 + Python 富化的职责划分、翻译缓存方案(Redis translation:{tenant_id}:{target_lang}:{md5(text)})以及 HTTP 接口使用方式
- 更新顶层 README、搜索API对接指南与测试Pipeline说明:增加关于 indexer 专用服务(serve-indexer, 端口6004)、正式文档构建接口以及手动链路验证(MySQL → build-docs → ES 查询对比)的说明
- 清理并修正 ES 诊断脚本 docs/常用查询 - ES.md:统一改为 per-tenant 索引 search_products_tenant_{tenant_id},修正过期字段名(keywords 等)和分面聚合字段(去掉 .keyword,使用当前 mapping 中的字段)
Made-with: Cursor
Showing
10 changed files
with
1143 additions
and
116 deletions
Show diff stats
README.md
| ... | ... | @@ -212,6 +212,17 @@ curl -X POST http://localhost:6002/search/ \ |
| 212 | 212 | - `scripts/ingest.sh <tenant_id> [recreate]`:驱动 `indexer/` 模块写入 `search_products` |
| 213 | 213 | - 详解:`测试数据指南.md` |
| 214 | 214 | |
| 215 | +- **索引富化 & Java 对接** | |
| 216 | + - Java 索引程序负责:全量/增量调度 + 从 MySQL 查询 `shoplazza_product_spu/sku/option/...` | |
| 217 | + - Python `indexer` 模块负责:**MySQL 行 → ES doc** 的全部逻辑(多语言、翻译、向量、规格聚合等) | |
| 218 | + - 正式对接接口(推荐): | |
| 219 | + - `POST http://<indexer_host>:6004/indexer/build-docs` | |
| 220 | + - 入参:`tenant_id + items[{spu, skus, options}]` | |
| 221 | + - 出参:与 `mappings/search_products.json` 完全一致的 `docs` 列表,上游自行写入 ES | |
| 222 | + - 调试/自测接口(内部使用): | |
| 223 | + - `POST http://127.0.0.1:6004/indexer/build-docs-from-db`,只需要 `tenant_id + spu_ids`,由服务内部查库并返回 ES doc | |
| 224 | + - 详解:`indexer/README.md`、`docs/索引字段说明v2.md` | |
| 225 | + | |
| 215 | 226 | - **搜索服务 & API** |
| 216 | 227 | - `api/`(FastAPI)承载 REST API,`search/` + `query/` 负责查询解析与下发 |
| 217 | 228 | - API、分页、过滤、Facet、KNN 等:`搜索API对接指南.md` | ... | ... |
api/routes/indexer.py
| ... | ... | @@ -6,8 +6,8 @@ |
| 6 | 6 | |
| 7 | 7 | import asyncio |
| 8 | 8 | from fastapi import APIRouter, HTTPException |
| 9 | -from typing import List | |
| 10 | -from pydantic import BaseModel | |
| 9 | +from typing import Any, Dict, List | |
| 10 | +from pydantic import BaseModel, Field | |
| 11 | 11 | import logging |
| 12 | 12 | from sqlalchemy import text |
| 13 | 13 | |
| ... | ... | @@ -38,6 +38,44 @@ class GetDocumentsRequest(BaseModel): |
| 38 | 38 | spu_ids: List[str] |
| 39 | 39 | |
| 40 | 40 | |
| 41 | +class BuildDocItem(BaseModel): | |
| 42 | + """ | |
| 43 | + 单个 SPU 的原始数据包(由上游从 MySQL 查询得到)。 | |
| 44 | + | |
| 45 | + - spu: 一行 SPU 记录,对应 shoplazza_product_spu 表 | |
| 46 | + - skus: 该 SPU 下的所有 SKU 记录,对应 shoplazza_product_sku 表 | |
| 47 | + - options: 该 SPU 的所有 Option 记录,对应 shoplazza_product_option 表 | |
| 48 | + """ | |
| 49 | + spu: Dict[str, Any] = Field(..., description="单个 SPU 的原始字段(MySQL 行数据)") | |
| 50 | + skus: List[Dict[str, Any]] = Field(default_factory=list, description="该 SPU 关联的 SKU 列表") | |
| 51 | + options: List[Dict[str, Any]] = Field(default_factory=list, description="该 SPU 关联的 Option 列表") | |
| 52 | + | |
| 53 | + | |
| 54 | +class BuildDocsRequest(BaseModel): | |
| 55 | + """ | |
| 56 | + 基于上游已查询出的 MySQL 原始数据,构建 ES 索引文档(不访问数据库、不写入 ES)。 | |
| 57 | + | |
| 58 | + 该接口是 Java 等外部索引程序正式使用的“doc 生成接口”: | |
| 59 | + - 上游负责:全量 / 增量调度 + 从 MySQL 查询出各表数据 | |
| 60 | + - 本模块负责:根据配置和算法,将原始行数据转换为与 mappings/search_products.json 一致的 ES 文档 | |
| 61 | + """ | |
| 62 | + tenant_id: str = Field(..., description="租户 ID,用于加载租户配置、语言策略等") | |
| 63 | + items: List[BuildDocItem] = Field(..., description="需要构建 doc 的 SPU 列表(含其 SKUs 和 Options)") | |
| 64 | + | |
| 65 | + | |
| 66 | +class BuildDocsFromDbRequest(BaseModel): | |
| 67 | + """ | |
| 68 | + 便捷测试请求:只提供 tenant_id 和 spu_ids,由本服务从 MySQL 查询原始数据, | |
| 69 | + 然后内部调用 /indexer/build-docs 的同一套逻辑构建 ES doc。 | |
| 70 | + | |
| 71 | + 用途: | |
| 72 | + - 本地/联调时快速验证 doc 结构,无需手工构造庞大的 BuildDocsRequest JSON | |
| 73 | + - 生产正式使用建议直接走 BuildDocsRequest,由外层(Java)控制 MySQL 查询 | |
| 74 | + """ | |
| 75 | + tenant_id: str = Field(..., description="租户 ID") | |
| 76 | + spu_ids: List[str] = Field(..., description="需要构建 doc 的 SPU ID 列表") | |
| 77 | + | |
| 78 | + | |
| 41 | 79 | @router.post("/reindex") |
| 42 | 80 | async def reindex_all(request: ReindexRequest): |
| 43 | 81 | """ |
| ... | ... | @@ -139,6 +177,238 @@ async def index_spus(request: IndexSpusRequest): |
| 139 | 177 | raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") |
| 140 | 178 | |
| 141 | 179 | |
| 180 | +@router.post("/build-docs") | |
| 181 | +async def build_docs(request: BuildDocsRequest): | |
| 182 | + """ | |
| 183 | + 构建 ES 文档(不访问数据库、不写入 ES)。 | |
| 184 | + | |
| 185 | + 使用场景: | |
| 186 | + - 上游(例如 Java 索引程序)已经从 MySQL 查询出了 SPU / SKU / Option 等原始行数据 | |
| 187 | + - 希望复用本项目的全部“索引富化”能力(多语言、翻译、向量、规格聚合等) | |
| 188 | + - 只需要拿到与 `mappings/search_products.json` 一致的 doc 列表,由上游自行写入 ES | |
| 189 | + """ | |
| 190 | + try: | |
| 191 | + if not request.items: | |
| 192 | + raise HTTPException(status_code=400, detail="items cannot be empty") | |
| 193 | + if len(request.items) > 200: | |
| 194 | + raise HTTPException(status_code=400, detail="Maximum 200 items allowed per request") | |
| 195 | + | |
| 196 | + incremental_service = get_incremental_service() | |
| 197 | + if incremental_service is None: | |
| 198 | + raise HTTPException(status_code=503, detail="Incremental indexer service is not initialized") | |
| 199 | + | |
| 200 | + # 复用增量索引服务中的 transformer 缓存与配置 / 语言 / embedding 初始化逻辑 | |
| 201 | + transformer, encoder, enable_embedding = incremental_service._get_transformer_bundle( | |
| 202 | + tenant_id=request.tenant_id | |
| 203 | + ) | |
| 204 | + | |
| 205 | + import pandas as pd | |
| 206 | + | |
| 207 | + docs: List[Dict[str, Any]] = [] | |
| 208 | + failed: List[Dict[str, Any]] = [] | |
| 209 | + | |
| 210 | + for item in request.items: | |
| 211 | + try: | |
| 212 | + # 将上游传入的 MySQL 行数据转换为 Pandas 结构,复用 SPUDocumentTransformer | |
| 213 | + spu_df = pd.DataFrame([item.spu]) | |
| 214 | + spu_row = spu_df.iloc[0] | |
| 215 | + skus_df = pd.DataFrame(item.skus) if item.skus else pd.DataFrame() | |
| 216 | + options_df = pd.DataFrame(item.options) if item.options else pd.DataFrame() | |
| 217 | + | |
| 218 | + doc = transformer.transform_spu_to_doc( | |
| 219 | + tenant_id=request.tenant_id, | |
| 220 | + spu_row=spu_row, | |
| 221 | + skus=skus_df, | |
| 222 | + options=options_df, | |
| 223 | + ) | |
| 224 | + | |
| 225 | + if doc is None: | |
| 226 | + failed.append( | |
| 227 | + { | |
| 228 | + "spu_id": str(item.spu.get("id")), | |
| 229 | + "error": "transform_spu_to_doc returned None", | |
| 230 | + } | |
| 231 | + ) | |
| 232 | + continue | |
| 233 | + | |
| 234 | + # 在“构建 doc”接口中,是否补齐 embedding 由内部配置决定(与增量索引一致) | |
| 235 | + # 此处不强制生成 / 不强制关闭,只复用 transformer_bundle 的 encoder / enable_embedding 设置。 | |
| 236 | + if enable_embedding and encoder: | |
| 237 | + title_obj = doc.get("title") or {} | |
| 238 | + title_text = None | |
| 239 | + if isinstance(title_obj, dict): | |
| 240 | + title_text = title_obj.get("en") or title_obj.get("zh") | |
| 241 | + if not title_text: | |
| 242 | + for v in title_obj.values(): | |
| 243 | + if v and str(v).strip(): | |
| 244 | + title_text = str(v) | |
| 245 | + break | |
| 246 | + if title_text and str(title_text).strip(): | |
| 247 | + try: | |
| 248 | + embeddings = encoder.encode(title_text) | |
| 249 | + if embeddings is not None and len(embeddings) > 0: | |
| 250 | + emb0 = embeddings[0] | |
| 251 | + import numpy as np | |
| 252 | + | |
| 253 | + if isinstance(emb0, np.ndarray): | |
| 254 | + doc["title_embedding"] = emb0.tolist() | |
| 255 | + except Exception: | |
| 256 | + # 构建 doc 接口不因为 embedding 失败而整体失败 | |
| 257 | + pass | |
| 258 | + | |
| 259 | + docs.append(doc) | |
| 260 | + except Exception as e: | |
| 261 | + failed.append( | |
| 262 | + { | |
| 263 | + "spu_id": str(item.spu.get("id")), | |
| 264 | + "error": str(e), | |
| 265 | + } | |
| 266 | + ) | |
| 267 | + | |
| 268 | + return { | |
| 269 | + "tenant_id": request.tenant_id, | |
| 270 | + "docs": docs, | |
| 271 | + "total": len(request.items), | |
| 272 | + "success_count": len(docs), | |
| 273 | + "failed_count": len(failed), | |
| 274 | + "failed": failed, | |
| 275 | + } | |
| 276 | + | |
| 277 | + except HTTPException: | |
| 278 | + raise | |
| 279 | + except Exception as e: | |
| 280 | + logger.error(f"Error building docs for tenant_id={request.tenant_id}: {e}", exc_info=True) | |
| 281 | + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") | |
| 282 | + | |
| 283 | + | |
| 284 | +@router.post("/build-docs-from-db") | |
| 285 | +async def build_docs_from_db(request: BuildDocsFromDbRequest): | |
| 286 | + """ | |
| 287 | + 基于数据库数据构建 ES 文档(测试 / 调试用)。 | |
| 288 | + | |
| 289 | + - 入参:tenant_id + spu_ids | |
| 290 | + - 步骤: | |
| 291 | + 1. 使用增量索引服务的查询能力,从 MySQL 批量加载 SPU / SKU / Option | |
| 292 | + 2. 组装为 BuildDocsRequest 的 items | |
| 293 | + 3. 内部调用与 /indexer/build-docs 相同的构建逻辑,返回 ES-ready docs | |
| 294 | + | |
| 295 | + 注意: | |
| 296 | + - 该接口主要用于本项目自测和调试;正式生产建议由上游(Java)自行查库后调用 /indexer/build-docs | |
| 297 | + """ | |
| 298 | + try: | |
| 299 | + if not request.spu_ids: | |
| 300 | + raise HTTPException(status_code=400, detail="spu_ids cannot be empty") | |
| 301 | + if len(request.spu_ids) > 200: | |
| 302 | + raise HTTPException(status_code=400, detail="Maximum 200 SPU IDs allowed per request") | |
| 303 | + | |
| 304 | + incremental_service = get_incremental_service() | |
| 305 | + if incremental_service is None: | |
| 306 | + raise HTTPException(status_code=503, detail="Incremental indexer service is not initialized") | |
| 307 | + | |
| 308 | + # 直接复用增量服务里的批量查询方法,从 MySQL 拉取原始行数据 | |
| 309 | + # 只加载未删除的记录(include_deleted=False) | |
| 310 | + spu_df = incremental_service._load_spus_for_spu_ids( | |
| 311 | + tenant_id=request.tenant_id, | |
| 312 | + spu_ids=request.spu_ids, | |
| 313 | + include_deleted=False | |
| 314 | + ) | |
| 315 | + if spu_df.empty: | |
| 316 | + return { | |
| 317 | + "tenant_id": request.tenant_id, | |
| 318 | + "docs": [], | |
| 319 | + "total": 0, | |
| 320 | + "success_count": 0, | |
| 321 | + "failed_count": len(request.spu_ids), | |
| 322 | + "failed": [ | |
| 323 | + {"spu_id": spu_id, "error": "SPU not found or deleted"} | |
| 324 | + for spu_id in request.spu_ids | |
| 325 | + ], | |
| 326 | + } | |
| 327 | + | |
| 328 | + # 仅对存在的 spu_id 构建 item,避免无效 ID | |
| 329 | + # _load_skus_for_spu_ids / _load_options_for_spu_ids 会自动过滤不存在的 spu_id | |
| 330 | + existing_ids = [str(int(i)) for i in spu_df["id"].tolist()] | |
| 331 | + skus_df = incremental_service._load_skus_for_spu_ids( | |
| 332 | + tenant_id=request.tenant_id, spu_ids=existing_ids | |
| 333 | + ) | |
| 334 | + options_df = incremental_service._load_options_for_spu_ids( | |
| 335 | + tenant_id=request.tenant_id, spu_ids=existing_ids | |
| 336 | + ) | |
| 337 | + | |
| 338 | + import pandas as pd | |
| 339 | + | |
| 340 | + # group by spu_id 方便取子集 | |
| 341 | + sku_groups = skus_df.groupby("spu_id") if not skus_df.empty else None | |
| 342 | + option_groups = options_df.groupby("spu_id") if not options_df.empty else None | |
| 343 | + | |
| 344 | + items: List[BuildDocItem] = [] | |
| 345 | + failed: List[Dict[str, Any]] = [] | |
| 346 | + | |
| 347 | + for _, spu_row in spu_df.iterrows(): | |
| 348 | + spu_id = int(spu_row["id"]) | |
| 349 | + try: | |
| 350 | + spu_dict = spu_row.to_dict() | |
| 351 | + skus = ( | |
| 352 | + sku_groups.get_group(spu_id).to_dict("records") | |
| 353 | + if sku_groups is not None and spu_id in sku_groups.groups | |
| 354 | + else [] | |
| 355 | + ) | |
| 356 | + options = ( | |
| 357 | + option_groups.get_group(spu_id).to_dict("records") | |
| 358 | + if option_groups is not None and spu_id in option_groups.groups | |
| 359 | + else [] | |
| 360 | + ) | |
| 361 | + items.append( | |
| 362 | + BuildDocItem( | |
| 363 | + spu=spu_dict, | |
| 364 | + skus=skus, | |
| 365 | + options=options, | |
| 366 | + ) | |
| 367 | + ) | |
| 368 | + except Exception as e: | |
| 369 | + failed.append( | |
| 370 | + { | |
| 371 | + "spu_id": str(spu_id), | |
| 372 | + "error": str(e), | |
| 373 | + } | |
| 374 | + ) | |
| 375 | + | |
| 376 | + if not items: | |
| 377 | + return { | |
| 378 | + "tenant_id": request.tenant_id, | |
| 379 | + "docs": [], | |
| 380 | + "total": 0, | |
| 381 | + "success_count": 0, | |
| 382 | + "failed_count": len(request.spu_ids), | |
| 383 | + "failed": failed | |
| 384 | + or [ | |
| 385 | + {"spu_id": spu_id, "error": "SPU not found or data load failed"} | |
| 386 | + for spu_id in request.spu_ids | |
| 387 | + ], | |
| 388 | + } | |
| 389 | + | |
| 390 | + # 调用与 /indexer/build-docs 相同的构建逻辑 | |
| 391 | + build_request = BuildDocsRequest(tenant_id=request.tenant_id, items=items) | |
| 392 | + result = await build_docs(build_request) | |
| 393 | + | |
| 394 | + # 合并两层 failed 信息 | |
| 395 | + merged_failed = list(result.get("failed", [])) if isinstance(result, dict) else [] | |
| 396 | + merged_failed.extend(failed) | |
| 397 | + | |
| 398 | + if isinstance(result, dict): | |
| 399 | + result["failed"] = merged_failed | |
| 400 | + # 更新 failed_count | |
| 401 | + result["failed_count"] = len(merged_failed) | |
| 402 | + return result | |
| 403 | + return result | |
| 404 | + | |
| 405 | + except HTTPException: | |
| 406 | + raise | |
| 407 | + except Exception as e: | |
| 408 | + logger.error(f"Error building docs from DB for tenant_id={request.tenant_id}: {e}", exc_info=True) | |
| 409 | + raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") | |
| 410 | + | |
| 411 | + | |
| 142 | 412 | @router.post("/documents") |
| 143 | 413 | async def get_documents(request: GetDocumentsRequest): |
| 144 | 414 | """ | ... | ... |
docs/常用查询 - ES.md
| ... | ... | @@ -3,6 +3,9 @@ |
| 3 | 3 | # ====================================== |
| 4 | 4 | # 租户相关 |
| 5 | 5 | # ====================================== |
| 6 | +# | |
| 7 | +# 说明:索引已按租户拆分为 search_products_tenant_{tenant_id}, | |
| 8 | +# 一般情况下不需要在查询中再按 tenant_id 过滤(可选保留用于排查)。 | |
| 6 | 9 | |
| 7 | 10 | ### 1. 根据 tenant_id / spu_id 查询 |
| 8 | 11 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| ... | ... | @@ -11,60 +14,54 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ |
| 11 | 14 | "query": { |
| 12 | 15 | "bool": { |
| 13 | 16 | "filter": [ |
| 14 | - { "term": {"spu_id" : 206150} }, | |
| 15 | - { "term": { "tenant_id": "170" } } | |
| 17 | + { "term": {"spu_id" : 206150} } | |
| 16 | 18 | ] |
| 17 | 19 | } |
| 18 | 20 | } |
| 19 | 21 | }' |
| 20 | 22 | |
| 21 | 23 | |
| 22 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 23 | - "size": 100, | |
| 24 | - "_source": ["title"], | |
| 25 | - "query": { | |
| 26 | - "bool": { | |
| 27 | - "filter": [ | |
| 28 | - { "term": { "tenant_id": "170" } } | |
| 29 | - ] | |
| 30 | - } | |
| 31 | - } | |
| 32 | - }' | |
| 24 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 25 | + "size": 100, | |
| 26 | + "_source": ["title"], | |
| 27 | + "query": { | |
| 28 | + "match_all": {} | |
| 29 | + } | |
| 30 | +}' | |
| 33 | 31 | |
| 34 | 32 | |
| 35 | 33 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 36 | - "size": 5, | |
| 37 | - "_source": ["title", "keyword", "keyword.zh", "tags"], | |
| 38 | - "query": { | |
| 39 | - "bool": { | |
| 40 | - "filter": [ | |
| 41 | - { "term": { "spu_id": "223167" } } | |
| 42 | - ] | |
| 43 | - } | |
| 34 | + "size": 5, | |
| 35 | + "_source": ["title", "keywords", "tags"], | |
| 36 | + "query": { | |
| 37 | + "bool": { | |
| 38 | + "filter": [ | |
| 39 | + { "term": { "spu_id": "223167" } } | |
| 40 | + ] | |
| 44 | 41 | } |
| 45 | - }' | |
| 42 | + } | |
| 43 | +}' | |
| 46 | 44 | |
| 47 | 45 | |
| 48 | 46 | curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 49 | - "size": 1, | |
| 50 | - "_source": ["title", "keyword", "keyword.zh", "tags"], | |
| 51 | - "query": { | |
| 52 | - "bool": { | |
| 53 | - "must": [ | |
| 54 | - { | |
| 55 | - "match": { | |
| 56 | - "title.en": { | |
| 57 | - "query": "Floerns Women Gothic Graphic Ribbed Strapless Tube Top Asymmetrical Ruched Bandeau Tops" | |
| 58 | - } | |
| 47 | + "size": 1, | |
| 48 | + "_source": ["title", "keywords", "tags"], | |
| 49 | + "query": { | |
| 50 | + "bool": { | |
| 51 | + "must": [ | |
| 52 | + { | |
| 53 | + "match": { | |
| 54 | + "title.en": { | |
| 55 | + "query": "Floerns Women Gothic Graphic Ribbed Strapless Tube Top Asymmetrical Ruched Bandeau Tops" | |
| 59 | 56 | } |
| 60 | 57 | } |
| 61 | - ], | |
| 62 | - "filter": [ | |
| 63 | - { "term": { "tenant_id": "170" } }, | |
| 64 | - { "terms": { "tags": ["女装", "派对"] } } | |
| 65 | - ] | |
| 66 | - } | |
| 58 | + } | |
| 59 | + ], | |
| 60 | + "filter": [ | |
| 61 | + { "terms": { "tags": ["女装", "派对"] } } | |
| 62 | + ] | |
| 67 | 63 | } |
| 64 | + } | |
| 68 | 65 | }' |
| 69 | 66 | |
| 70 | 67 | |
| ... | ... | @@ -89,17 +86,17 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ |
| 89 | 86 | } |
| 90 | 87 | }' |
| 91 | 88 | |
| 92 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{ | |
| 93 | - "analyzer": "icu_analyzer", | |
| 89 | +Curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ | |
| 90 | + "analyzer": "index_ansj", | |
| 94 | 91 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" |
| 95 | 92 | }' |
| 96 | 93 | |
| 97 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_analyze' -H 'Content-Type: application/json' -d '{ | |
| 98 | - "analyzer": "hanlp_standard", | |
| 94 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ | |
| 95 | + "analyzer": "query_ansj", | |
| 99 | 96 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" |
| 100 | 97 | }' |
| 101 | 98 | |
| 102 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 99 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 103 | 100 | "size": 100, |
| 104 | 101 | "from": 0, |
| 105 | 102 | "query": { |
| ... | ... | @@ -127,16 +124,14 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ |
| 127 | 124 | ], |
| 128 | 125 | "filter": [ |
| 129 | 126 | { |
| 130 | - "term": { | |
| 131 | - "tenant_id": "170" | |
| 132 | - } | |
| 127 | + "match_all": {} | |
| 133 | 128 | } |
| 134 | 129 | ] |
| 135 | 130 | } |
| 136 | 131 | } |
| 137 | 132 | }' |
| 138 | 133 | |
| 139 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 134 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 140 | 135 | "size": 1, |
| 141 | 136 | "from": 0, |
| 142 | 137 | "query": { |
| ... | ... | @@ -163,11 +158,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ |
| 163 | 158 | } |
| 164 | 159 | ], |
| 165 | 160 | "filter": [ |
| 166 | - { | |
| 167 | - "term": { | |
| 168 | - "tenant_id": "170" | |
| 169 | - } | |
| 170 | - } | |
| 161 | + { "match_all": {} } | |
| 171 | 162 | ] |
| 172 | 163 | } |
| 173 | 164 | }, |
| ... | ... | @@ -259,12 +250,10 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ |
| 259 | 250 | } |
| 260 | 251 | }' |
| 261 | 252 | |
| 262 | -GET /search_products/_search | |
| 253 | +GET /search_products_tenant_2/_search | |
| 263 | 254 | { |
| 264 | 255 | "query": { |
| 265 | - "term": { | |
| 266 | - "tenant_id": "2" | |
| 267 | - } | |
| 256 | + "match_all": {} | |
| 268 | 257 | } |
| 269 | 258 | } |
| 270 | 259 | |
| ... | ... | @@ -282,11 +271,9 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ |
| 282 | 271 | |
| 283 | 272 | |
| 284 | 273 | ### 2. 统计租户的总文档数 |
| 285 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 274 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 286 | 275 | "query": { |
| 287 | - "term": { | |
| 288 | - "tenant_id": "170" | |
| 289 | - } | |
| 276 | + "match_all": {} | |
| 290 | 277 | } |
| 291 | 278 | }' |
| 292 | 279 | |
| ... | ... | @@ -298,7 +285,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_c |
| 298 | 285 | ## 1. 检查ES文档的分面字段数据 |
| 299 | 286 | |
| 300 | 287 | ### 1.1 查询特定租户的商品,显示分面相关字段 |
| 301 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 288 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 302 | 289 | "query": { |
| 303 | 290 | "term": { |
| 304 | 291 | "tenant_id": "162" |
| ... | ... | @@ -319,7 +306,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 319 | 306 | }' |
| 320 | 307 | |
| 321 | 308 | ### 1.2 验证category1_name字段是否有数据 |
| 322 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 309 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 323 | 310 | "query": { |
| 324 | 311 | "bool": { |
| 325 | 312 | "filter": [ |
| ... | ... | @@ -332,7 +319,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 332 | 319 | }' |
| 333 | 320 | |
| 334 | 321 | ### 1.3 验证specifications字段是否有数据 |
| 335 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 322 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 336 | 323 | "query": { |
| 337 | 324 | "bool": { |
| 338 | 325 | "filter": [ |
| ... | ... | @@ -347,17 +334,15 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 347 | 334 | ## 2. 分面聚合查询(Facet Aggregations) |
| 348 | 335 | |
| 349 | 336 | ### 2.1 category1_name 分面聚合 |
| 350 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 337 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 351 | 338 | "query": { |
| 352 | - "term": { | |
| 353 | - "tenant_id": "162" | |
| 354 | - } | |
| 339 | + "match_all": {} | |
| 355 | 340 | }, |
| 356 | 341 | "size": 0, |
| 357 | 342 | "aggs": { |
| 358 | 343 | "category1_name_facet": { |
| 359 | 344 | "terms": { |
| 360 | - "field": "category1_name.keyword", | |
| 345 | + "field": "category1_name", | |
| 361 | 346 | "size": 50 |
| 362 | 347 | } |
| 363 | 348 | } |
| ... | ... | @@ -365,11 +350,9 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 365 | 350 | }' |
| 366 | 351 | |
| 367 | 352 | ### 2.2 specifications.color 分面聚合 |
| 368 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 353 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 369 | 354 | "query": { |
| 370 | - "term": { | |
| 371 | - "tenant_id": "162" | |
| 372 | - } | |
| 355 | + "match_all": {} | |
| 373 | 356 | }, |
| 374 | 357 | "size": 0, |
| 375 | 358 | "aggs": { |
| ... | ... | @@ -387,7 +370,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 387 | 370 | "aggs": { |
| 388 | 371 | "values": { |
| 389 | 372 | "terms": { |
| 390 | - "field": "specifications.value.keyword", | |
| 373 | + "field": "specifications.value", | |
| 391 | 374 | "size": 50 |
| 392 | 375 | } |
| 393 | 376 | } |
| ... | ... | @@ -399,11 +382,9 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 399 | 382 | }' |
| 400 | 383 | |
| 401 | 384 | ### 2.3 specifications.size 分面聚合 |
| 402 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 385 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 403 | 386 | "query": { |
| 404 | - "term": { | |
| 405 | - "tenant_id": "162" | |
| 406 | - } | |
| 387 | + "match_all": {} | |
| 407 | 388 | }, |
| 408 | 389 | "size": 0, |
| 409 | 390 | "aggs": { |
| ... | ... | @@ -421,7 +402,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 421 | 402 | "aggs": { |
| 422 | 403 | "values": { |
| 423 | 404 | "terms": { |
| 424 | - "field": "specifications.value.keyword", | |
| 405 | + "field": "specifications.value", | |
| 425 | 406 | "size": 50 |
| 426 | 407 | } |
| 427 | 408 | } |
| ... | ... | @@ -433,11 +414,9 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 433 | 414 | }' |
| 434 | 415 | |
| 435 | 416 | ### 2.4 specifications.material 分面聚合 |
| 436 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 417 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 437 | 418 | "query": { |
| 438 | - "term": { | |
| 439 | - "tenant_id": "162" | |
| 440 | - } | |
| 419 | + "match_all": {} | |
| 441 | 420 | }, |
| 442 | 421 | "size": 0, |
| 443 | 422 | "aggs": { |
| ... | ... | @@ -455,7 +434,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 455 | 434 | "aggs": { |
| 456 | 435 | "values": { |
| 457 | 436 | "terms": { |
| 458 | - "field": "specifications.value.keyword", | |
| 437 | + "field": "specifications.value", | |
| 459 | 438 | "size": 50 |
| 460 | 439 | } |
| 461 | 440 | } |
| ... | ... | @@ -467,17 +446,15 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 467 | 446 | }' |
| 468 | 447 | |
| 469 | 448 | ### 2.5 综合分面聚合(category + color + size + material) |
| 470 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 449 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 471 | 450 | "query": { |
| 472 | - "term": { | |
| 473 | - "tenant_id": "162" | |
| 474 | - } | |
| 451 | + "match_all": {} | |
| 475 | 452 | }, |
| 476 | 453 | "size": 0, |
| 477 | 454 | "aggs": { |
| 478 | 455 | "category1_name_facet": { |
| 479 | 456 | "terms": { |
| 480 | - "field": "category1_name.keyword", | |
| 457 | + "field": "category1_name", | |
| 481 | 458 | "size": 50 |
| 482 | 459 | } |
| 483 | 460 | }, |
| ... | ... | @@ -495,7 +472,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 495 | 472 | "aggs": { |
| 496 | 473 | "values": { |
| 497 | 474 | "terms": { |
| 498 | - "field": "specifications.value.keyword", | |
| 475 | + "field": "specifications.value", | |
| 499 | 476 | "size": 50 |
| 500 | 477 | } |
| 501 | 478 | } |
| ... | ... | @@ -517,7 +494,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 517 | 494 | "aggs": { |
| 518 | 495 | "values": { |
| 519 | 496 | "terms": { |
| 520 | - "field": "specifications.value.keyword", | |
| 497 | + "field": "specifications.value", | |
| 521 | 498 | "size": 50 |
| 522 | 499 | } |
| 523 | 500 | } |
| ... | ... | @@ -539,7 +516,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 539 | 516 | "aggs": { |
| 540 | 517 | "values": { |
| 541 | 518 | "terms": { |
| 542 | - "field": "specifications.value.keyword", | |
| 519 | + "field": "specifications.value", | |
| 543 | 520 | "size": 50 |
| 544 | 521 | } |
| 545 | 522 | } |
| ... | ... | @@ -594,11 +571,10 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 594 | 571 | ## 4. 统计查询 |
| 595 | 572 | |
| 596 | 573 | ### 4.1 统计有category1_name的文档数量 |
| 597 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 574 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 598 | 575 | "query": { |
| 599 | 576 | "bool": { |
| 600 | 577 | "filter": [ |
| 601 | - { "term": { "tenant_id": "162" } }, | |
| 602 | 578 | { "exists": { "field": "category1_name" } } |
| 603 | 579 | ] |
| 604 | 580 | } |
| ... | ... | @@ -606,11 +582,10 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_c |
| 606 | 582 | }' |
| 607 | 583 | |
| 608 | 584 | ### 4.2 统计有specifications的文档数量 |
| 609 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 585 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 610 | 586 | "query": { |
| 611 | 587 | "bool": { |
| 612 | 588 | "filter": [ |
| 613 | - { "term": { "tenant_id": "162" } }, | |
| 614 | 589 | { "exists": { "field": "specifications" } } |
| 615 | 590 | ] |
| 616 | 591 | } |
| ... | ... | @@ -621,7 +596,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_c |
| 621 | 596 | ## 5. 诊断问题场景 |
| 622 | 597 | |
| 623 | 598 | ### 5.1 查找没有category1_name但有category的文档(MySQL有数据但ES没有) |
| 624 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 599 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 625 | 600 | "query": { |
| 626 | 601 | "bool": { |
| 627 | 602 | "filter": [ |
| ... | ... | @@ -637,7 +612,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 637 | 612 | }' |
| 638 | 613 | |
| 639 | 614 | ### 5.2 查找有option但没有specifications的文档(数据转换问题) |
| 640 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 615 | +curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 641 | 616 | "query": { |
| 642 | 617 | "bool": { |
| 643 | 618 | "filter": [ |
| ... | ... | @@ -655,7 +630,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 655 | 630 | |
| 656 | 631 | |
| 657 | 632 | 重排序: |
| 658 | -GET /search_products/_search | |
| 633 | +GET /search_products_tenant_170/_search | |
| 659 | 634 | { |
| 660 | 635 | "query": { |
| 661 | 636 | "match": { | ... | ... |
docs/搜索API对接指南.md
| ... | ... | @@ -132,9 +132,11 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 132 | 132 | | 搜索建议 | GET | `/search/suggestions` | 搜索建议(框架,暂未实现) ⚠️ TODO | |
| 133 | 133 | | 即时搜索 | GET | `/search/instant` | 边输入边搜索(框架) ⚠️ TODO | |
| 134 | 134 | | 获取文档 | GET | `/search/{doc_id}` | 获取单个文档 | |
| 135 | -| 全量索引 | POST | `/indexer/reindex` | 全量索引接口(导入数据,不删除索引) | | |
| 136 | -| 增量索引 | POST | `/indexer/index` | 增量索引接口(指定SPU ID列表进行索引,支持自动检测删除和显式删除) | | |
| 135 | +| 全量索引 | POST | `/indexer/reindex` | 全量索引接口(导入数据,不删除索引,仅推荐自测使用) | | |
| 136 | +| 增量索引 | POST | `/indexer/index` | 增量索引接口(指定SPU ID列表进行索引,支持自动检测删除和显式删除,仅推荐自测使用) | | |
| 137 | 137 | | 查询文档 | POST | `/indexer/documents` | 查询SPU文档数据(不写入ES) | |
| 138 | +| 构建ES文档(正式对接) | POST | `/indexer/build-docs` | 基于上游提供的 MySQL 行数据构建 ES doc,不写入 ES,供 Java 等调用后自行写入 | | |
| 139 | +| 构建ES文档(测试用) | POST | `/indexer/build-docs-from-db` | 仅在测试/调试时使用,根据 `tenant_id + spu_ids` 内部查库并构建 ES doc | | |
| 138 | 140 | | 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 | |
| 139 | 141 | | 健康检查 | GET | `/admin/health` | 服务健康检查 | |
| 140 | 142 | | 获取配置 | GET | `/admin/config` | 获取租户配置 | |
| ... | ... | @@ -871,7 +873,7 @@ curl "http://localhost:6002/search/12345" -H "X-Tenant-ID: 162" |
| 871 | 873 | ### 5.1 全量索引接口 |
| 872 | 874 | |
| 873 | 875 | - **端点**: `POST /indexer/reindex` |
| 874 | -- **描述**: 全量索引,将指定租户的所有SPU数据导入到ES索引(不会删除现有索引) | |
| 876 | +- **描述**: 全量索引,将指定租户的所有SPU数据导入到ES索引(不会删除现有索引)。**推荐仅用于自测/运维场景**;生产环境下更推荐由 Java 等上游控制调度与写 ES。 | |
| 875 | 877 | |
| 876 | 878 | #### 请求参数 |
| 877 | 879 | |
| ... | ... | @@ -977,7 +979,7 @@ cat logs/indexer.log | jq 'select(.operation == "request_complete") | {timestamp |
| 977 | 979 | ### 5.2 增量索引接口 |
| 978 | 980 | |
| 979 | 981 | - **端点**: `POST /indexer/index` |
| 980 | -- **描述**: 增量索引接口,根据指定的SPU ID列表进行索引,直接将数据写入ES。用于增量更新指定商品。 | |
| 982 | +- **描述**: 增量索引接口,根据指定的SPU ID列表进行索引,直接将数据写入ES。用于增量更新指定商品。**推荐仅作为内部/调试入口**;正式对接建议改用 `/indexer/build-docs`,由上游写 ES。 | |
| 981 | 983 | |
| 982 | 984 | **删除说明**: |
| 983 | 985 | - `spu_ids`中的SPU:如果数据库`deleted=1`,自动从ES删除,响应状态为`deleted` |
| ... | ... | @@ -1242,6 +1244,101 @@ curl -X POST "http://localhost:6004/indexer/documents" \ |
| 1242 | 1244 | } |
| 1243 | 1245 | ``` |
| 1244 | 1246 | |
| 1247 | +### 5.5 文档构建接口(正式对接推荐) | |
| 1248 | + | |
| 1249 | +#### 5.5.1 `POST /indexer/build-docs` | |
| 1250 | + | |
| 1251 | +- **描述**: | |
| 1252 | + 基于调用方(通常是 Java 索引程序)提供的 **MySQL 行数据** 构建 ES 文档(doc),**不写入 ES**。 | |
| 1253 | + 由本服务负责“如何构建 doc”(多语言、翻译、向量、规格聚合等),由调用方负责“何时调度 + 如何写 ES”。 | |
| 1254 | + | |
| 1255 | +#### 请求参数 | |
| 1256 | + | |
| 1257 | +```json | |
| 1258 | +{ | |
| 1259 | + "tenant_id": "170", | |
| 1260 | + "items": [ | |
| 1261 | + { | |
| 1262 | + "spu": { "id": 223167, "tenant_id": 170, "title": "..." }, | |
| 1263 | + "skus": [ | |
| 1264 | + { "id": 3988393, "spu_id": 223167, "price": 25.99, "compare_at_price": 25.99 } | |
| 1265 | + ], | |
| 1266 | + "options": [] | |
| 1267 | + } | |
| 1268 | + ] | |
| 1269 | +} | |
| 1270 | +``` | |
| 1271 | + | |
| 1272 | +> `spu` / `skus` / `options` 字段应当直接使用从 `shoplazza_product_spu` / `shoplazza_product_sku` / `shoplazza_product_option` 查询出的行字段。 | |
| 1273 | + | |
| 1274 | +#### 响应示例(节选) | |
| 1275 | + | |
| 1276 | +```json | |
| 1277 | +{ | |
| 1278 | + "tenant_id": "170", | |
| 1279 | + "docs": [ | |
| 1280 | + { | |
| 1281 | + "tenant_id": "170", | |
| 1282 | + "spu_id": "223167", | |
| 1283 | + "title": { "en": "...", "zh": "..." }, | |
| 1284 | + "tags": ["Floerns", "Clothing", "Shoes & Jewelry"], | |
| 1285 | + "skus": [ | |
| 1286 | + { | |
| 1287 | + "sku_id": "3988393", | |
| 1288 | + "price": 25.99, | |
| 1289 | + "compare_at_price": 25.99, | |
| 1290 | + "stock": 100 | |
| 1291 | + } | |
| 1292 | + ], | |
| 1293 | + "min_price": 25.99, | |
| 1294 | + "max_price": 25.99, | |
| 1295 | + "compare_at_price": 25.99, | |
| 1296 | + "total_inventory": 100, | |
| 1297 | + "title_embedding": [/* 1024 维向量 */] | |
| 1298 | + // 其余字段与 mappings/search_products.json 一致 | |
| 1299 | + } | |
| 1300 | + ], | |
| 1301 | + "total": 1, | |
| 1302 | + "success_count": 1, | |
| 1303 | + "failed_count": 0, | |
| 1304 | + "failed": [] | |
| 1305 | +} | |
| 1306 | +``` | |
| 1307 | + | |
| 1308 | +#### 使用建议 | |
| 1309 | + | |
| 1310 | +- **生产环境推荐流程**: | |
| 1311 | + 1. Java 根据业务逻辑决定哪些 SPU 需要(全量/增量)处理; | |
| 1312 | + 2. Java 从 MySQL 查询 SPU/SKU/Option 行,拼成 `items`; | |
| 1313 | + 3. 调用 `/indexer/build-docs` 获取 ES-ready `docs`; | |
| 1314 | + 4. Java 使用自己的 ES 客户端写入 `search_products_tenant_{tenant_id}`。 | |
| 1315 | + | |
| 1316 | +### 5.6 文档构建接口(测试 / 自测) | |
| 1317 | + | |
| 1318 | +#### 5.6.1 `POST /indexer/build-docs-from-db` | |
| 1319 | + | |
| 1320 | +- **描述**: | |
| 1321 | + 仅用于测试/调试:调用方只提供 `tenant_id` 和 `spu_ids`,由 indexer 服务内部从 MySQL 查询 SPU/SKU/Option,然后调用与 `/indexer/build-docs` 相同的文档构建逻辑,返回 ES-ready doc。 | |
| 1322 | + | |
| 1323 | +#### 请求参数 | |
| 1324 | + | |
| 1325 | +```json | |
| 1326 | +{ | |
| 1327 | + "tenant_id": "170", | |
| 1328 | + "spu_ids": ["223167"] | |
| 1329 | +} | |
| 1330 | +``` | |
| 1331 | + | |
| 1332 | +#### 请求示例 | |
| 1333 | + | |
| 1334 | +```bash | |
| 1335 | +curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | |
| 1336 | + -H "Content-Type: application/json" \ | |
| 1337 | + -d '{"tenant_id": "170", "spu_ids": ["223167"]}' | |
| 1338 | +``` | |
| 1339 | + | |
| 1340 | +返回结构与 `/indexer/build-docs` 相同,可直接用于对比 ES 实际文档或调试字段映射问题。 | |
| 1341 | + | |
| 1245 | 1342 | #### 请求示例 |
| 1246 | 1343 | |
| 1247 | 1344 | ```bash | ... | ... |
docs/测试Pipeline说明.md
| ... | ... | @@ -145,6 +145,71 @@ pytest tests/integration/ -v |
| 145 | 145 | pytest tests/integration/test_api_integration.py -v |
| 146 | 146 | ``` |
| 147 | 147 | |
| 148 | +### 5. 索引 & 文档构建流水线验证(手动) | |
| 149 | + | |
| 150 | +除了自动化测试外,推荐在联调/问题排查时手动跑一遍“**从 MySQL 到 ES doc**”的索引流水线,确保字段与 mapping、查询逻辑一致。 | |
| 151 | + | |
| 152 | +#### 5.1 启动 Indexer 服务 | |
| 153 | + | |
| 154 | +```bash | |
| 155 | +cd /home/tw/SearchEngine | |
| 156 | +./scripts/stop.sh # 停掉已有进程(可选) | |
| 157 | +./scripts/start_indexer.sh # 启动专用 indexer 服务,默认端口 6004 | |
| 158 | +``` | |
| 159 | + | |
| 160 | +#### 5.2 基于数据库构建 ES doc(只看、不写 ES) | |
| 161 | + | |
| 162 | +> 场景:已经知道某个 `tenant_id` 和 `spu_id`,想看它在“最新逻辑下”的 ES 文档长什么样。 | |
| 163 | + | |
| 164 | +```bash | |
| 165 | +curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | |
| 166 | + -H "Content-Type: application/json" \ | |
| 167 | + -d '{ | |
| 168 | + "tenant_id": "170", | |
| 169 | + "spu_ids": ["223167"] | |
| 170 | + }' | |
| 171 | +``` | |
| 172 | + | |
| 173 | +返回中: | |
| 174 | + | |
| 175 | +- `docs[0]` 为当前代码构造出来的完整 ES doc(与 `mappings/search_products.json` 对齐); | |
| 176 | +- 可以直接比对: | |
| 177 | + - 索引字段说明:`docs/索引字段说明v2.md` | |
| 178 | + - 实际 ES 文档:`docs/常用查询 - ES.md` 中的查询示例(按 `spu_id` 过滤)。 | |
| 179 | + | |
| 180 | +#### 5.3 与 ES 实际数据对比 | |
| 181 | + | |
| 182 | +```bash | |
| 183 | +curl -u 'essa:***' \ | |
| 184 | + -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' \ | |
| 185 | + -H 'Content-Type: application/json' \ | |
| 186 | + -d '{ | |
| 187 | + "size": 5, | |
| 188 | + "_source": ["title", "tags"], | |
| 189 | + "query": { | |
| 190 | + "bool": { | |
| 191 | + "filter": [ | |
| 192 | + { "term": { "spu_id": "223167" } } | |
| 193 | + ] | |
| 194 | + } | |
| 195 | + } | |
| 196 | + }' | |
| 197 | +``` | |
| 198 | + | |
| 199 | +对比如下内容是否一致: | |
| 200 | + | |
| 201 | +- 多语言字段:`title/brief/description/vendor/category_name_text/category_path`; | |
| 202 | +- 结构字段:`tags/specifications/skus/min_price/max_price/compare_at_price/total_inventory` 等; | |
| 203 | +- 算法字段:`title_embedding` 是否存在(值不必逐项比对)。 | |
| 204 | + | |
| 205 | +如果两边不一致,可以结合: | |
| 206 | + | |
| 207 | +- `indexer/document_transformer.py`(文档构造逻辑); | |
| 208 | +- `indexer/incremental_service.py`(增量索引/查库逻辑); | |
| 209 | +- `logs/indexer.log`(索引日志) | |
| 210 | + | |
| 211 | +逐步缩小问题范围。 | |
| 212 | + | |
| 148 | 213 | ### 4. 性能测试 (Performance Tests) |
| 149 | 214 | |
| 150 | 215 | **目的**: 验证系统性能指标 | ... | ... |
| ... | ... | @@ -0,0 +1,584 @@ |
| 1 | +## 一、整体架构说明 | |
| 2 | + | |
| 3 | +### 1.1 系统角色划分 | |
| 4 | + | |
| 5 | +- **Java 索引程序(/home/tw/saas-server)** | |
| 6 | + - 负责“**什么时候、对哪些 SPU 做索引**”(调度 & 触发)。 | |
| 7 | + - 负责**商品/店铺/类目等基础数据同步**(写 MySQL)。 | |
| 8 | + - 负责**多租户环境下的全量/增量索引调度**,但不再关心具体 doc 字段细节。 | |
| 9 | + | |
| 10 | +- **Python 索引富化模块(本项目 SearchEngine / indexer)** | |
| 11 | + - 负责“**如何把 MySQL 基础数据变成符合 ES mapping 的 doc**”,包括: | |
| 12 | + - 多语言字段组织; | |
| 13 | + - 翻译调用与缓存; | |
| 14 | + - 向量生成与(可选)缓存; | |
| 15 | + - 规格、SKU 聚合、类目路径解析等。 | |
| 16 | + - 保留当前“**直接写 ES**”能力(BulkIndexingService, IncrementalIndexerService)。 | |
| 17 | + - **新增:提供 HTTP 接口**,接收 Java 传入的完整 doc/基础数据,返回或直接写入 ES-ready doc,以支持“Java 只调接口、不关心字段细节”的新架构。 | |
| 18 | + | |
| 19 | +--- | |
| 20 | + | |
| 21 | +## 二、Java 索引程序职责(保留 & 对接) | |
| 22 | + | |
| 23 | +### 2.1 现有职责(需保留) | |
| 24 | + | |
| 25 | +1. **索引触发与调度** | |
| 26 | + - 全量: | |
| 27 | + - `ShoplazzaProductIndexFullJob` → `ProductIndexServiceImpl.fullIndex(...)` | |
| 28 | + - 按 tenant 分页拉取 SPU,调用批量索引。 | |
| 29 | + - 增量: | |
| 30 | + - MQ 消费(`ShoplazzaProductCreateAndUpdateConsumerService`); | |
| 31 | + - 手工/API 触发增量索引 → `incrementalIndex(tenantId, spuId)`。 | |
| 32 | + | |
| 33 | +2. **MySQL 基础数据维护** | |
| 34 | + - **店铺配置表 `shoplazza_shop_config`**: | |
| 35 | + - 字段: | |
| 36 | + - `primary_language`:店铺主语言; | |
| 37 | + - `translate_to_en`:是否需要翻译成英文; | |
| 38 | + - `translate_to_zh`:是否需要翻译成中文。 | |
| 39 | + - 逻辑: | |
| 40 | + - 每晚商品同步(`ShoplazzaProductSyncServiceImpl`)时,根据店铺 locale/Shoplazza 配置,写入/更新 `primary_language` 与翻译开关字段。 | |
| 41 | + - **类目表 `shoplazza_product_category`**: | |
| 42 | + - 同步/修正逻辑封装在 `ProductCategoryService` 中: | |
| 43 | + - `getProductCategoryByPathIdList(tenantId, categoryIdList)`; | |
| 44 | + - 当 mapping 对不上时触发 `syncProductCategoryByApi(shopId)` 再重查。 | |
| 45 | + | |
| 46 | +3. **Shopify/Shoplazza 商品同步 & 并发控制** | |
| 47 | + - MQ 等机制用于削峰,避免店匠批量导入商品时压垮服务: | |
| 48 | + - 同步逻辑在 `ShoplazzaProductSyncServiceImpl`; | |
| 49 | + - 对接 MQ 消息:商品创建、更新、删除等事件; | |
| 50 | + - 对高并发导入,拆分为小批次写入 MySQL + 后续异步索引。 | |
| 51 | + | |
| 52 | +4. **索引结构调整为 per-tenant** | |
| 53 | + - 在 Java 中已统一使用: | |
| 54 | + - `indexName = elasticsearchProperties.buildIndexName(tenantId);` | |
| 55 | + - 索引命名形如:`search_products_tenant_{tenant_id}`。 | |
| 56 | + - Python 侧对应 `get_tenant_index_name(tenant_id)`。 | |
| 57 | + | |
| 58 | +### 2.2 Java 侧不再深入关心的部分 | |
| 59 | + | |
| 60 | +- ES 文档结构 `ProductIndexDocument` 的字段细节(title/brief/description/vendor/category_xxx/tags/specifications/skus/embedding 等)。 | |
| 61 | +- 翻译、向量等具体算法逻辑。 | |
| 62 | +- qanchors/keywords 等新特征的计算。 | |
| 63 | + | |
| 64 | +**新职责边界**: | |
| 65 | +Java 只负责“**选出要索引的 SPU + 从 MySQL 拉取原始数据 + 调用 Python 服务**(或交给 Python 做完整索引)”。 | |
| 66 | + | |
| 67 | +--- | |
| 68 | + | |
| 69 | +## 三、Python 索引富化模块职责 | |
| 70 | + | |
| 71 | +### 3.1 职责总览 | |
| 72 | + | |
| 73 | +- 输入:**MySQL 基础数据**(`shoplazza_product_spu/sku/option/category/image` 等)。 | |
| 74 | +- 输出:**符合 `mappings/search_products.json` 的 doc 列表**,包括: | |
| 75 | + - 多语言文本字段:`title.*`, `brief.*`, `description.*`, `vendor.*`, `category_path.*`, `category_name_text.*`; | |
| 76 | + - 算法特征:`title_embedding`, `image_embedding`, `qanchors.*`, `keywords.*`(未来扩展); | |
| 77 | + - 结构化字段:`tags`, `specifications`, `skus`, `min_price`, `max_price`, `compare_at_price`, `total_inventory`, `sales` 等。 | |
| 78 | +- 附加: | |
| 79 | + - 翻译调用 & **Redis 缓存**(继承 Java 的 key 组织和 TTL 策略); | |
| 80 | + - 向量生成(文本 & 图片); | |
| 81 | + - ES 写入能力(Bulk & Incremental)。 | |
| 82 | + | |
| 83 | +### 3.2 当前 Python 模块结构(简述) | |
| 84 | + | |
| 85 | +- `indexer/spu_transformer.py`: | |
| 86 | + - 从 MySQL 读取 SPU/SKU/Option 数据。 | |
| 87 | +- `indexer/document_transformer.py` (`SPUDocumentTransformer`): | |
| 88 | + - 把单个 SPU + SKUs + Options 转成 ES 文档(doc)。 | |
| 89 | +- `indexer/bulk_indexing_service.py`: | |
| 90 | + - 全量索引服务,调用 `SPUTransformer` → `SPUDocumentTransformer` → `BulkIndexer` 写 ES。 | |
| 91 | +- `indexer/incremental_service.py`: | |
| 92 | + - 增量索引服务,按 SPU 列表批量更新/删除 ES 文档。 | |
| 93 | + | |
| 94 | +新设计中,本模块还将新增: | |
| 95 | + | |
| 96 | +- **HTTP 富化接口**(例如 `POST /index/enrich_docs` / `POST /index/enrich_and_index`); | |
| 97 | +- **翻译客户端 + Redis 缓存**,按 Java 规则组织 key; | |
| 98 | +- **(可选)向量缓存**。 | |
| 99 | + | |
| 100 | +--- | |
| 101 | + | |
| 102 | +## 四、翻译与多语言字段设计(Java → Python 迁移) | |
| 103 | + | |
| 104 | +### 4.1 语言决策策略(从 Java 迁移) | |
| 105 | + | |
| 106 | +在 Java 中,语言决策逻辑在 `ProductIndexConvert.convertToIndexDocument(...)`,现规则: | |
| 107 | + | |
| 108 | +1. **基础配置** | |
| 109 | + - `primaryLanguage = shopConfig.primaryLanguage`(主语言); | |
| 110 | + - `translateToEn = shopConfig.translateToEn`; | |
| 111 | + - `translateToZh = shopConfig.translateToZh`。 | |
| 112 | + | |
| 113 | +2. **检测内容语言** | |
| 114 | + - 标题:`queryTextLang = BaiDuTransApi.queryTextLang(spu.title)`; | |
| 115 | + - 若检测不到,则视为 `queryTextLang = primaryLanguage`。 | |
| 116 | + | |
| 117 | +3. **确定源语言 `defSrcLang` 与目标翻译语言 `defLang`** | |
| 118 | + - 情况 A:`primaryLanguage == queryTextLang`(不缺主语言) | |
| 119 | + - `defSrcLang = primaryLanguage`; | |
| 120 | + - 若 `translateToEn && primaryLanguage != "en"` → `defLang = "en"`; | |
| 121 | + - 若 `translateToZh && primaryLanguage != "zh"` → `defLang = "zh"`。 | |
| 122 | + - 情况 B:`primaryLanguage != queryTextLang`(认为“缺主语言”) | |
| 123 | + - `defSrcLang = queryTextLang`; | |
| 124 | + - `defIsMissPrimaryLanguage = true`; | |
| 125 | + - 若 `translateToEn && queryTextLang != "en"` → `defLang = "en"`; | |
| 126 | + - 若 `translateToZh && queryTextLang != "zh"` → `defLang = "zh"`; | |
| 127 | + - 若上述都不满足(没有翻到 en/zh),则回退: | |
| 128 | + - `defIsMissPrimaryLanguage = false`; | |
| 129 | + - `defLang = primaryLanguage`(翻译回主语言)。 | |
| 130 | + | |
| 131 | + - 兜底:若 `defLang` 仍为空,默认 `defLang = "en"`。 | |
| 132 | + | |
| 133 | +4. **DocumentTranslation 元数据(用于后续检查/补偿)** | |
| 134 | + | |
| 135 | +```java | |
| 136 | +documentTranslation.setDefSrcLang(defSrcLang); | |
| 137 | +documentTranslation.setDefLang(defLang); | |
| 138 | +documentTranslation.setDefQueryTextLang(queryTextLang); | |
| 139 | +documentTranslation.setDefIsMissPrimaryLanguage(isMissPrimaryLanguage); | |
| 140 | +``` | |
| 141 | + | |
| 142 | +**类目字段**(`category`, `category_path`)有类似一套独立的决策逻辑,写入 `defCategorySrcLang / defCategoryLang / defCategoryQueryTextLang / defCategoryIsMissPrimaryLanguage`。 | |
| 143 | + | |
| 144 | +> **Python 需做的**:在 `SPUDocumentTransformer` 内部复刻这套决策逻辑,对 title/brief/description/vendor/keywords & category 字段分别计算源语言 / 目标语言 / 主语言缺失标记,保存在一个等价的结构中(不一定叫 `DocumentTranslation`,但含义相同)。 | |
| 145 | + | |
| 146 | +### 4.2 多语言字段填充规则 | |
| 147 | + | |
| 148 | +以标题为例(Java 中的 `DocumentTitle`): | |
| 149 | + | |
| 150 | +- 原始 title:`spu.title`; | |
| 151 | +- 多语言写入: | |
| 152 | + | |
| 153 | +```java | |
| 154 | +DocumentTitle title = new DocumentTitle(); | |
| 155 | +title.set(defLang, translationTitle) // 翻译结果(例如 en 或 zh) | |
| 156 | + .set(defSrcLang, spu.getTitle()) // 原文 | |
| 157 | + .set(primaryLanguage, primaryTitle); // 若缺主语言,则从 queryTextLang 翻回主语言 | |
| 158 | +doc.setTitle(title); | |
| 159 | +``` | |
| 160 | + | |
| 161 | +同样模式适用于: | |
| 162 | + | |
| 163 | +- `keywords`:从 `spu.seoKeywords` 翻译生成; | |
| 164 | +- `brief`:从 `spu.brief` 翻译生成; | |
| 165 | +- `description`:从清理 HTML 后的 `spu.description` 翻译生成; | |
| 166 | +- `vendor`:从 `spu.vendor` 翻译生成。 | |
| 167 | + | |
| 168 | +**类目字段**: | |
| 169 | + | |
| 170 | +- `category_name_text`:基于 `spu.category`; | |
| 171 | +- `category_path`:基于类目表 `product_category` 的 name 列表拼出的路径字符串 `allPathName`。 | |
| 172 | + | |
| 173 | +分别写入: | |
| 174 | + | |
| 175 | +```java | |
| 176 | +categoryNameText.set(categoryLang, translationCategory) | |
| 177 | + .set(defLang, spu.getCategory()) | |
| 178 | + .set(primaryLanguage, primaryCategory); | |
| 179 | +categoryPath.set(categoryLang, translationCategoryPath) | |
| 180 | + .set(defLang, allPathName) | |
| 181 | + .set(primaryLanguage, primaryCategoryPath); | |
| 182 | +``` | |
| 183 | + | |
| 184 | +> **Python 需做的**:在构造 doc 时,为各多语言字段生成 dict: | |
| 185 | +> | |
| 186 | +> - 至少包含 `{defSrcLang: 原文}`; | |
| 187 | +> - 如有翻译,加入 `{defLang: 翻译}`; | |
| 188 | +> - 若 `isMissPrimaryLanguage` 为 true,再加入 `{primaryLanguage: 回译结果}`。 | |
| 189 | + | |
| 190 | +--- | |
| 191 | + | |
| 192 | +## 五、翻译服务与 Redis 缓存设计(必须继承) | |
| 193 | + | |
| 194 | +### 5.1 外部翻译接口 | |
| 195 | + | |
| 196 | +你当前要使用的翻译接口(Python 侧): | |
| 197 | + | |
| 198 | +```bash | |
| 199 | +curl -X POST http://120.76.41.98:6006/translate \ | |
| 200 | + -H "Content-Type: application/json" \ | |
| 201 | + -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣", | |
| 202 | + "target_lang":"en", | |
| 203 | + "source_lang":"auto"}' | |
| 204 | +``` | |
| 205 | + | |
| 206 | +- 请求参数: | |
| 207 | + - `text`:待翻译文本; | |
| 208 | + - `target_lang`:目标语言(如 `"en"`、`"zh"` 等); | |
| 209 | + - `source_lang`:源语言(支持 `"auto"` 自动检测)。 | |
| 210 | +- 响应(参考 Java `TranslationServiceImpl.querySaasTranslate`): | |
| 211 | + - JSON 里包含 `status` 字段,如果是 `"success"`,且 `translated_text` 非空,则返回翻译结果。 | |
| 212 | + | |
| 213 | +### 5.2 Redis 缓存 key 规则(与 Java 完全对齐) | |
| 214 | + | |
| 215 | +在 `TranslationServiceImpl` 中,缓存 key 定义: | |
| 216 | + | |
| 217 | +```java | |
| 218 | +private static final Integer DEFAULT_TTL_DAYS = 30; | |
| 219 | + | |
| 220 | +private String buildCacheKey(Long tenantId, String sourceText, String targetLang) { | |
| 221 | + String hash = DigestUtils.md5Hex(sourceText); | |
| 222 | + return String.format("translation:%s:%s:%s", | |
| 223 | + tenantId, targetLang.toLowerCase(), hash); | |
| 224 | +} | |
| 225 | +``` | |
| 226 | + | |
| 227 | +- **Key 模式**:`translation:{tenantId}:{targetLangLower}:{md5(sourceText)}`。 | |
| 228 | +- **Value**:`translatedText`(单纯的翻译结果字符串)。 | |
| 229 | +- **TTL**:30 天(`DEFAULT_TTL_DAYS = 30`)。 | |
| 230 | + | |
| 231 | +缓存读写逻辑: | |
| 232 | + | |
| 233 | +```java | |
| 234 | +// 读 | |
| 235 | +String cache = queryCacheTranslation(tenantId, text, targetLang); | |
| 236 | +if (cache != null) { | |
| 237 | + // 构造 TranslateDTO 返回 | |
| 238 | +} | |
| 239 | + | |
| 240 | +// 写 | |
| 241 | +saveCacheTranslation(tenantId, text, targetLang, translatedText); | |
| 242 | +``` | |
| 243 | + | |
| 244 | +**你在 Python 侧必须继承的:** | |
| 245 | + | |
| 246 | +- 相同的 key 组织规则; | |
| 247 | +- 相同的 TTL; | |
| 248 | +- 相同的维度(tenant_id + 目标语言 + 原文 md5)。 | |
| 249 | + | |
| 250 | +这样可以**复用以前在 Java 里已经积累的翻译缓存**,也保证后续迁移过程中行为一致。 | |
| 251 | + | |
| 252 | +--- | |
| 253 | + | |
| 254 | +## 六、向量服务与缓存(扩展设计) | |
| 255 | + | |
| 256 | +### 6.1 文本向量(title_embedding) | |
| 257 | + | |
| 258 | +Java 侧: | |
| 259 | + | |
| 260 | +```java | |
| 261 | +List<Float> titleEmbedding = vectorService.generateTextVector(spu.getTitle()); | |
| 262 | +if (StrUtil.isNotBlank(spu.getTitle()) && CollUtil.isNotEmpty(titleEmbedding)) { | |
| 263 | + doc.setTitleEmbedding(titleEmbedding); | |
| 264 | +} | |
| 265 | +``` | |
| 266 | + | |
| 267 | +你当前 Python 侧已有: | |
| 268 | + | |
| 269 | +- `embeddings/text_encoder.py`(BGE-M3 模型); | |
| 270 | +- `SPUDocumentTransformer._fill_title_embedding` 已封装了调用 encoder 的逻辑。 | |
| 271 | + | |
| 272 | +**建议缓存策略(可选,但推荐):** | |
| 273 | + | |
| 274 | +- Key:`text_vector:{model_name}:{md5(title)}`; | |
| 275 | +- Value:向量数组(可序列化成 JSON 或 msgpack); | |
| 276 | +- TTL:可设为较长时间(例如 30 天或不设置 TTL,由容量控制)。 | |
| 277 | + | |
| 278 | +### 6.2 图片向量(image_embedding) | |
| 279 | + | |
| 280 | +Java 侧: | |
| 281 | + | |
| 282 | +- 对 `ShoplazzaProductImageDO.src` 调用 `vectorService.generateImageVector(image.getSrc())`; | |
| 283 | +- 写入 `image_embedding.vector`(1024 维)+ `url`。 | |
| 284 | + | |
| 285 | +Python 侧已有 `embeddings/clip_encoder.py` 可用 CN-CLIP 模型;缓存策略类似: | |
| 286 | + | |
| 287 | +- Key:`image_vector:{model_name}:{md5(image_url)}`。 | |
| 288 | + | |
| 289 | +--- | |
| 290 | + | |
| 291 | +## 七、doc 组织逻辑迁移(从 Java 的 ProductIndexConvert 到 Python 的 SPUDocumentTransformer) | |
| 292 | + | |
| 293 | +### 7.1 需要完整迁移的要点 | |
| 294 | + | |
| 295 | +#### 7.1.1 基础字段 | |
| 296 | + | |
| 297 | +- `tenant_id`:`spu.tenant_id`; | |
| 298 | +- `spu_id`:`spu.id`; | |
| 299 | +- `create_time` / `update_time`:格式化为 ISO 字符串(`yyyy-MM-dd'T'HH:mm:ss`); | |
| 300 | +- 主图 `image_url`: | |
| 301 | + - 若 `image_src` 以 `http` 开头 → 直接使用; | |
| 302 | + - 否则前缀 `//`。 | |
| 303 | + | |
| 304 | +#### 7.1.2 多语言字段(title/brief/description/vendor/keywords/category_name_text/category_path) | |
| 305 | + | |
| 306 | +- 完整复刻前文第 4 节的逻辑: | |
| 307 | + - 语言决策; | |
| 308 | + - 调翻译接口(带缓存); | |
| 309 | + - 构造多语言对象(Python 中为 dict): | |
| 310 | + | |
| 311 | + ```python | |
| 312 | + title = {} | |
| 313 | + title[src_lang] = spu.title | |
| 314 | + if translation_title: title[def_lang] = translation_title | |
| 315 | + if is_miss_primary and primary_title: title[primary_lang] = primary_title | |
| 316 | + doc["title"] = title | |
| 317 | + ``` | |
| 318 | + | |
| 319 | +- `keywords` 来源:`spu.seo_keywords`; | |
| 320 | +- 类目路径需要从 `product_category` 表取 name 列表,按 level 排序后拼成 `allPathName = "一级/二级/三级"`。 | |
| 321 | + | |
| 322 | +#### 7.1.3 tags | |
| 323 | + | |
| 324 | +- 同 Java 逻辑: | |
| 325 | + | |
| 326 | +```python | |
| 327 | +if spu.tags: | |
| 328 | + doc["tags"] = [t.strip() for t in spu.tags.split(",") if t.strip()] | |
| 329 | +``` | |
| 330 | + | |
| 331 | +#### 7.1.4 规格、SKU、价格、库存 | |
| 332 | + | |
| 333 | +迁移 Java 的 `parseOptions` & `parseSkus` 逻辑: | |
| 334 | + | |
| 335 | +- `option1_name`, `option2_name`, `option3_name`: | |
| 336 | + - 按 `position` 排序 Option 表; | |
| 337 | + - 取前三个,写 name; | |
| 338 | + - 每个 Option 的 `values` 去重后写入 `optionX_values`; | |
| 339 | + - 同时构建 `valueNameMap[value] = optionName`,用于构建 `specifications`。 | |
| 340 | + | |
| 341 | +- `specifications`: | |
| 342 | + - 遍历所有 SKU: | |
| 343 | + - 若 `option1` 非空:构造 1 条 `{sku_id, name=valueNameMap[option1], value=option1}`; | |
| 344 | + - 同理 `option2`、`option3`。 | |
| 345 | + | |
| 346 | +- `skus`(nested): | |
| 347 | + - 每条 SKU 映射为: | |
| 348 | + - `sku_id`, `price`, `compare_at_price`, `sku_code`, `stock`, `weight`, `weight_unit`, `option1_value`, `option2_value`, `option3_value`, `image_src`。 | |
| 349 | + | |
| 350 | +- 聚合字段: | |
| 351 | + - `min_price` / `max_price`:全体 SKU `price` 的最小/最大; | |
| 352 | + - `compare_at_price`:全体 SKU `compare_at_price` 的最大值(若 SPU 有 compare_at_price 可优先); | |
| 353 | + - `sku_prices`:所有 SKU price 列表; | |
| 354 | + - `sku_weights`:所有 SKU weight(long)列表; | |
| 355 | + - `sku_weight_units`:weight_unit 去重列表; | |
| 356 | + - `total_inventory`:所有 SKU `inventory_quantity` 总和; | |
| 357 | + - `sales`:虚拟销量 `spu.fake_sales`。 | |
| 358 | + | |
| 359 | +### 7.2 qanchors / keywords 扩展 | |
| 360 | + | |
| 361 | +- 当前 Java 中 `qanchors` 字段结构已存在,但未赋值; | |
| 362 | +- 设计建议: | |
| 363 | + - 在 Python 侧基于: | |
| 364 | + - 标题 / brief / description / tags / 类目等,做**查询锚点**抽取; | |
| 365 | + - 按与 `title/keywords` 类似的多语言结构写入 `qanchors.{lang}`; | |
| 366 | + - 翻译策略可选: | |
| 367 | + - 在生成锚点后再调用翻译; | |
| 368 | + - 或使用原始文本的翻译结果组合。 | |
| 369 | + | |
| 370 | +--- | |
| 371 | + | |
| 372 | +## 八、接口设计 | |
| 373 | + | |
| 374 | +### 8.1 保留的能力:直接写 ES(现有) | |
| 375 | + | |
| 376 | +- **全量索引**: | |
| 377 | + - CLI:`python main.py ingest ...` 或 `scripts/ingest.sh`; | |
| 378 | + - 入口:`BulkIndexingService.bulk_index(tenant_id, recreate_index, batch_size)`: | |
| 379 | + - 生成 tenant index 名; | |
| 380 | + - 如需重建则删除再建索引; | |
| 381 | + - 从 MySQL 拉数据 → `SPUTransformer.transform_batch()` → `BulkIndexer` 写 ES。 | |
| 382 | + | |
| 383 | +- **增量索引**: | |
| 384 | + - `IncrementalIndexerService.index_spus_to_es(es_client, tenant_id, spu_ids, index_name, batch_size, delete_spu_ids)`: | |
| 385 | + - 对于 deleted / DB 已无的 SPU,删除 ES 文档; | |
| 386 | + - 对仍存在的 SPU,从 MySQL 拉数据 → `create_document_transformer` → `SPUDocumentTransformer.transform_spu_to_doc` → 批量写入 ES。 | |
| 387 | + | |
| 388 | +### 8.2 新增接口一:文档富化(不写 ES) | |
| 389 | + | |
| 390 | +**目的**:供 Java 索引程序调用,仅获取 ES-ready docs,自行写入 ES,或作为后续多用途数据源。 | |
| 391 | + | |
| 392 | +- **接口示例**:`POST /index/enrich_docs` | |
| 393 | +- **入参**(伪 JSON): | |
| 394 | + | |
| 395 | +```json | |
| 396 | +{ | |
| 397 | + "tenant_id": "123", | |
| 398 | + "shop_config": { | |
| 399 | + "primary_language": "en", | |
| 400 | + "translate_to_en": true, | |
| 401 | + "translate_to_zh": false | |
| 402 | + }, | |
| 403 | + "spus": [ | |
| 404 | + { | |
| 405 | + "spu": { /* 映射 shoplazza_product_spu */ }, | |
| 406 | + "skus": [ /* shoplazza_product_sku 列表 */ ], | |
| 407 | + "options": [ /* shoplazza_product_option 列表 */ ], | |
| 408 | + "images": [ /* shoplazza_product_image 列表(可选) */ ] | |
| 409 | + }, | |
| 410 | + ... | |
| 411 | + ] | |
| 412 | +} | |
| 413 | +``` | |
| 414 | + | |
| 415 | +> 可选:也支持只传 `tenant_id + spu_ids`,由 Python 侧自行查 MySQL(对接现有 `SPUTransformer`),但从职责划分上,更推荐 **Java 查完基础数据再传给 Python**。 | |
| 416 | + | |
| 417 | +- **出参**: | |
| 418 | + | |
| 419 | +```json | |
| 420 | +{ | |
| 421 | + "tenant_id": "123", | |
| 422 | + "docs": [ | |
| 423 | + { | |
| 424 | + "spu_id": "1", | |
| 425 | + "tenant_id": "123", | |
| 426 | + "title": { "en": "...", "zh": "...", ... }, | |
| 427 | + "qanchors": { ... }, | |
| 428 | + "keywords": { ... }, | |
| 429 | + "brief": { ... }, | |
| 430 | + "description": { ... }, | |
| 431 | + "vendor": { ... }, | |
| 432 | + "tags": ["xxx","yyy"], | |
| 433 | + "image_url": "...", | |
| 434 | + "title_embedding": [ ... 1024 floats ... ], | |
| 435 | + "image_embedding": [ { "url": "...", "vector": [ ... ] } ], | |
| 436 | + "category_name_text": { ... }, | |
| 437 | + "category_path": { ... }, | |
| 438 | + "category_id": "xxx", | |
| 439 | + "category1_name": "xxx", | |
| 440 | + "specifications": [ ... ], | |
| 441 | + "skus": [ ... ], | |
| 442 | + "min_price": ..., | |
| 443 | + "max_price": ..., | |
| 444 | + "compare_at_price": ..., | |
| 445 | + "total_inventory": ..., | |
| 446 | + "sales": ... | |
| 447 | + }, | |
| 448 | + ... | |
| 449 | + ] | |
| 450 | +} | |
| 451 | +``` | |
| 452 | + | |
| 453 | +### 8.3 新增接口二:富化 + 写 ES | |
| 454 | + | |
| 455 | +**目的**:Java 只负责调度,不关心 ES client 细节。 | |
| 456 | + | |
| 457 | +- **接口示例**:`POST /index/enrich_and_index` | |
| 458 | +- **入参**:同上(基础数据 / spu_ids + tenant_id)。 | |
| 459 | +- **内部逻辑**: | |
| 460 | + 1. 按 4–7 节的规则构造 docs(含翻译 & 向量 & 缓存); | |
| 461 | + 2. 使用 `BulkIndexer` 写入 `search_products_tenant_{tenant_id}`; | |
| 462 | + 3. 返回统计信息。 | |
| 463 | + | |
| 464 | +- **出参**(例): | |
| 465 | + | |
| 466 | +```json | |
| 467 | +{ | |
| 468 | + "tenant_id": "123", | |
| 469 | + "index_name": "search_products_tenant_123", | |
| 470 | + "total": 100, | |
| 471 | + "indexed": 98, | |
| 472 | + "failed": 2, | |
| 473 | + "failed_spu_ids": ["456","789"], | |
| 474 | + "elapsed_ms": 12345 | |
| 475 | +} | |
| 476 | +``` | |
| 477 | + | |
| 478 | +--- | |
| 479 | + | |
| 480 | +## 九、小结 | |
| 481 | + | |
| 482 | +这份设计的目标是: | |
| 483 | + | |
| 484 | +- **保留现有 Java 调度 & 数据同步能力**,不破坏已有全量/增量任务和 MQ 削峰; | |
| 485 | +- **把 ES 文档结构、多语言逻辑、翻译与向量等算法能力全部收拢到 Python 索引富化模块**,实现“单一 owner”; | |
| 486 | +- **完全继承 Java 现有的翻译缓存策略**(Redis key & TTL & 维度),保证行为与性能的一致性; | |
| 487 | +- **为未来字段扩展(qanchors、更多 tags/特征)预留清晰路径**:仅需在 Python 侧新增逻辑和 mapping,不再拉 Java 入伙。 | |
| 488 | + | |
| 489 | +--- | |
| 490 | + | |
| 491 | +## 十、实际 HTTP 接口与测试用例(速查) | |
| 492 | + | |
| 493 | +### 10.1 端口与服务 | |
| 494 | + | |
| 495 | +- `./scripts/start_backend.sh` → `main.py serve` → 端口 `6002`,**没有 `/indexer/*` 路由**。 | |
| 496 | +- `./scripts/start_indexer.sh` → `main.py serve-indexer` → 端口 `6004`,只暴露 `/indexer/*`。 | |
| 497 | + | |
| 498 | +**实际调用索引相关接口时,请始终访问 `6004`。** | |
| 499 | + | |
| 500 | +### 10.2 关键接口 | |
| 501 | + | |
| 502 | +- **构建文档(正式使用)**:`POST /indexer/build-docs` | |
| 503 | + - 入参:`tenant_id + items[ { spu, skus, options } ]` | |
| 504 | + - 输出:`docs` 数组,每个元素是完整 ES doc,不查库、不写 ES。 | |
| 505 | + | |
| 506 | +- **构建文档(测试用,内部查库)**:`POST /indexer/build-docs-from-db` | |
| 507 | + - 入参:`{"tenant_id": "...", "spu_ids": ["..."]}` | |
| 508 | + - 内部:按 `spu_ids` 从 MySQL 查出 SPU/SKU/Option,再走与 `build-docs` 相同的转换逻辑。 | |
| 509 | + | |
| 510 | +- **全量壳**:`POST /indexer/reindex`(查库 + 转 doc + 写 ES,用于自测) | |
| 511 | +- **增量壳**:`POST /indexer/index`(查库 + 转 doc + 写 ES,用于自测) | |
| 512 | +- **单文档查看**:`POST /indexer/documents` | |
| 513 | +- **健康检查**:`GET /indexer/health` | |
| 514 | + | |
| 515 | +### 10.3 典型测试流程(以 tenant 170, spu_id 223167 为例) | |
| 516 | + | |
| 517 | +1. 启动 indexer 服务: | |
| 518 | + | |
| 519 | +```bash | |
| 520 | +./scripts/stop.sh | |
| 521 | +./scripts/start_indexer.sh | |
| 522 | +``` | |
| 523 | + | |
| 524 | +2. 构建指定 SPU 的 ES doc: | |
| 525 | + | |
| 526 | +```bash | |
| 527 | +curl -X POST "http://127.0.0.1:6004/indexer/build-docs-from-db" \ | |
| 528 | + -H "Content-Type: application/json" \ | |
| 529 | + -d '{"tenant_id": "170", "spu_ids": ["223167"]}' | |
| 530 | +``` | |
| 531 | + | |
| 532 | +3. 预期返回(节选): | |
| 533 | + | |
| 534 | +```json | |
| 535 | +{ | |
| 536 | + "tenant_id": "170", | |
| 537 | + "docs": [ | |
| 538 | + { | |
| 539 | + "tenant_id": "170", | |
| 540 | + "spu_id": "223167", | |
| 541 | + "title": { "en": "...Floerns Women's Gothic...", "zh": "弗洛恩斯 女士哥特风..." }, | |
| 542 | + "tags": ["Floerns", "Clothing", "Shoes & Jewelry", "..."], | |
| 543 | + "skus": [ | |
| 544 | + { | |
| 545 | + "sku_id": "3988393", | |
| 546 | + "price": 25.99, | |
| 547 | + "compare_at_price": 25.99, | |
| 548 | + "stock": 100 | |
| 549 | + } | |
| 550 | + ], | |
| 551 | + "min_price": 25.99, | |
| 552 | + "max_price": 25.99, | |
| 553 | + "compare_at_price": 25.99, | |
| 554 | + "total_inventory": 100, | |
| 555 | + "title_embedding": [ /* 1024 维向量 */ ] | |
| 556 | + } | |
| 557 | + ], | |
| 558 | + "total": 1, | |
| 559 | + "success_count": 1, | |
| 560 | + "failed_count": 0, | |
| 561 | + "failed": [] | |
| 562 | +} | |
| 563 | +``` | |
| 564 | + | |
| 565 | +4. 使用 `docs/常用查询 - ES.md` 中的查询,对应验证 ES 索引中的文档: | |
| 566 | + | |
| 567 | +```bash | |
| 568 | +curl -u 'essa:***' \ | |
| 569 | + -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' \ | |
| 570 | + -H 'Content-Type: application/json' \ | |
| 571 | + -d '{ | |
| 572 | + "size": 5, | |
| 573 | + "_source": ["title", "tags"], | |
| 574 | + "query": { | |
| 575 | + "bool": { | |
| 576 | + "filter": [ | |
| 577 | + { "term": { "spu_id": "223167" } } | |
| 578 | + ] | |
| 579 | + } | |
| 580 | + } | |
| 581 | + }' | |
| 582 | +``` | |
| 583 | + | |
| 584 | +通过这套流程可以完整验证:MySQL → Python 富化 → ES doc → ES 查询 的全链路行为是否符合预期。*** End Patch"""} ***! | ... | ... |
indexer/document_transformer.py
| ... | ... | @@ -141,13 +141,8 @@ class SPUDocumentTransformer: |
| 141 | 141 | doc['min_price'] = 0.0 |
| 142 | 142 | doc['max_price'] = 0.0 |
| 143 | 143 | |
| 144 | - # 优先使用 SPU 级 compare_at_price(与索引字段说明v2一致),否则取 SKU 最大值 | |
| 145 | - if pd.notna(spu_row.get('compare_at_price')): | |
| 146 | - try: | |
| 147 | - doc['compare_at_price'] = float(spu_row['compare_at_price']) | |
| 148 | - except (ValueError, TypeError): | |
| 149 | - doc['compare_at_price'] = float(max(compare_prices)) if compare_prices else None | |
| 150 | - elif compare_prices: | |
| 144 | + # SPU 不再读取 compare_at_price 字段;ES 的 compare_at_price 使用所有 SKU 中的最大对比价 | |
| 145 | + if compare_prices: | |
| 151 | 146 | doc['compare_at_price'] = float(max(compare_prices)) |
| 152 | 147 | else: |
| 153 | 148 | doc['compare_at_price'] = None | ... | ... |
indexer/incremental_service.py
| ... | ... | @@ -176,7 +176,7 @@ class IncrementalIndexerService: |
| 176 | 176 | image_src, image_width, image_height, image_path, image_alt, |
| 177 | 177 | tags, note, category, category_id, category_google_id, |
| 178 | 178 | category_level, category_path, |
| 179 | - compare_at_price, fake_sales, display_fake_sales, | |
| 179 | + fake_sales, display_fake_sales, | |
| 180 | 180 | tenant_id, creator, create_time, updater, update_time, deleted |
| 181 | 181 | FROM shoplazza_product_spu |
| 182 | 182 | WHERE tenant_id = :tenant_id AND id = :spu_id |
| ... | ... | @@ -191,7 +191,7 @@ class IncrementalIndexerService: |
| 191 | 191 | image_src, image_width, image_height, image_path, image_alt, |
| 192 | 192 | tags, note, category, category_id, category_google_id, |
| 193 | 193 | category_level, category_path, |
| 194 | - compare_at_price, fake_sales, display_fake_sales, | |
| 194 | + fake_sales, display_fake_sales, | |
| 195 | 195 | tenant_id, creator, create_time, updater, update_time, deleted |
| 196 | 196 | FROM shoplazza_product_spu |
| 197 | 197 | WHERE tenant_id = :tenant_id AND id = :spu_id AND deleted = 0 |
| ... | ... | @@ -243,7 +243,7 @@ class IncrementalIndexerService: |
| 243 | 243 | image_src, image_width, image_height, image_path, image_alt, |
| 244 | 244 | tags, note, category, category_id, category_google_id, |
| 245 | 245 | category_level, category_path, |
| 246 | - compare_at_price, fake_sales, display_fake_sales, | |
| 246 | + fake_sales, display_fake_sales, | |
| 247 | 247 | tenant_id, creator, create_time, updater, update_time, deleted |
| 248 | 248 | FROM shoplazza_product_spu |
| 249 | 249 | WHERE tenant_id = :tenant_id AND id IN :spu_ids |
| ... | ... | @@ -258,7 +258,7 @@ class IncrementalIndexerService: |
| 258 | 258 | image_src, image_width, image_height, image_path, image_alt, |
| 259 | 259 | tags, note, category, category_id, category_google_id, |
| 260 | 260 | category_level, category_path, |
| 261 | - compare_at_price, fake_sales, display_fake_sales, | |
| 261 | + fake_sales, display_fake_sales, | |
| 262 | 262 | tenant_id, creator, create_time, updater, update_time, deleted |
| 263 | 263 | FROM shoplazza_product_spu |
| 264 | 264 | WHERE tenant_id = :tenant_id AND deleted = 0 AND id IN :spu_ids | ... | ... |
| ... | ... | @@ -0,0 +1,30 @@ |
| 1 | +因为需要组织整个doc,我需要将当前的java程序迁移过来,项目路径在 /home/tw/saas-server | |
| 2 | +程序相对路径 包括但不限于 module-shoplazza/src/main/java/com/hsyl/saas/module/shoplazza/service/index/ProductIndexServiceImpl.java | |
| 3 | +请仔细阅读java相关代码,提取相关逻辑,特别是 翻译的相关字段 | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | +架构说明: | |
| 10 | + | |
| 11 | +java索引程序职责: | |
| 12 | + | |
| 13 | +负责增量、全量的触发,调度。 | |
| 14 | + | |
| 15 | +包括但不限于: | |
| 16 | +1、索引结构调整成按tenant_id的结构,并翻译对应的语言shoplazza_shop_config表对应的新增字段primary_language,translate_to_en,translate_to_zh | |
| 17 | +2、每晚上商品同步时,判断当前店铺主语言是什么,存入primary_language | |
| 18 | +3、同步店匠的类目shoplazza_product_category | |
| 19 | +4、加入MQ处理店匠批量导入商品并发太高,服务器承载不了的问题 | |
| 20 | + | |
| 21 | + | |
| 22 | +本模块: | |
| 23 | +负责 msyql 基础数据 → 索引结构的doc (包括缓存) | |
| 24 | + | |
| 25 | +翻译接口: curl -X POST http://120.76.41.98:6006/translate -H "Content-Type: application/json" -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣","target_lang":"en","source_lang":"auto"}' | |
| 26 | + | |
| 27 | +java的组织doc的逻辑都需要迁移过来。 | |
| 28 | + | |
| 29 | +当前项目,是直接将doc写入ES,这个功能也保留,但是,也要提供一个接口,输入完整的字段信息 | |
| 30 | + | ... | ... |
indexer/spu_transformer.py
| ... | ... | @@ -45,7 +45,7 @@ class SPUTransformer: |
| 45 | 45 | image_src, image_width, image_height, image_path, image_alt, |
| 46 | 46 | tags, note, category, category_id, category_google_id, |
| 47 | 47 | category_level, category_path, |
| 48 | - compare_at_price, fake_sales, display_fake_sales, | |
| 48 | + fake_sales, display_fake_sales, | |
| 49 | 49 | tenant_id, creator, create_time, updater, update_time, deleted |
| 50 | 50 | FROM shoplazza_product_spu |
| 51 | 51 | WHERE tenant_id = :tenant_id AND deleted = 0 | ... | ... |