Commit 7299bae6396856b85f415a780200216d2d97fede

Authored by tangwang
1 parent bc54124c

tests

.github/workflows/test.yml
1   -name: saas-search Test Pipeline
  1 +name: CI - Service Contracts
2 2  
3 3 on:
4 4 push:
5   - branches: [ main, master, develop ]
  5 + branches: [main, master, develop]
6 6 pull_request:
7   - branches: [ main, master, develop ]
8   - workflow_dispatch: # 允许手动触发
9   -
10   -env:
11   - PYTHON_VERSION: '3.9'
12   - NODE_VERSION: '16'
  7 + branches: [main, master, develop]
  8 + workflow_dispatch:
13 9  
14 10 jobs:
15   - # 代码质量检查
16   - code-quality:
17   - runs-on: ubuntu-latest
18   - name: Code Quality Check
19   -
20   - steps:
21   - - name: Checkout code
22   - uses: actions/checkout@v4
23   -
24   - - name: Set up Python
25   - uses: actions/setup-python@v4
26   - with:
27   - python-version: ${{ env.PYTHON_VERSION }}
28   -
29   - - name: Install dependencies
30   - run: |
31   - python -m pip install --upgrade pip
32   - pip install flake8 black isort mypy pylint
33   - pip install -r requirements.txt
34   -
35   - - name: Run Black (code formatting)
36   - run: |
37   - black --check --diff .
38   -
39   - - name: Run isort (import sorting)
40   - run: |
41   - isort --check-only --diff .
42   -
43   - - name: Run Flake8 (linting)
44   - run: |
45   - flake8 --max-line-length=100 --ignore=E203,W503 .
46   -
47   - - name: Run MyPy (type checking)
48   - run: |
49   - mypy --ignore-missing-imports --no-strict-optional .
50   -
51   - - name: Run Pylint
52   - run: |
53   - pylint --disable=C0114,C0115,C0116 --errors-only .
54   -
55   - # 单元测试
56   - unit-tests:
57   - runs-on: ubuntu-latest
58   - name: Unit Tests
59   -
60   - strategy:
61   - matrix:
62   - python-version: ['3.8', '3.9', '3.10', '3.11']
63   -
64   - steps:
65   - - name: Checkout code
66   - uses: actions/checkout@v4
67   -
68   - - name: Set up Python ${{ matrix.python-version }}
69   - uses: actions/setup-python@v4
70   - with:
71   - python-version: ${{ matrix.python-version }}
72   -
73   - - name: Cache pip dependencies
74   - uses: actions/cache@v3
75   - with:
76   - path: ~/.cache/pip
77   - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt') }}
78   - restore-keys: |
79   - ${{ runner.os }}-pip-
80   -
81   - - name: Install dependencies
82   - run: |
83   - python -m pip install --upgrade pip
84   - pip install pytest pytest-cov pytest-json-report
85   - pip install -r requirements.txt
86   -
87   - - name: Create test logs directory
88   - run: mkdir -p test_logs
89   -
90   - - name: Run unit tests
91   - run: |
92   - python -m pytest tests/unit/ \
93   - -v \
94   - --tb=short \
95   - --cov=. \
96   - --cov-report=xml \
97   - --cov-report=html \
98   - --cov-report=term-missing \
99   - --json-report \
100   - --json-report-file=test_logs/unit_test_results.json
101   -
102   - - name: Upload coverage to Codecov
103   - uses: codecov/codecov-action@v3
104   - with:
105   - file: ./coverage.xml
106   - flags: unittests
107   - name: codecov-umbrella
108   -
109   - - name: Upload unit test results
110   - uses: actions/upload-artifact@v3
111   - if: always()
112   - with:
113   - name: unit-test-results-${{ matrix.python-version }}
114   - path: |
115   - test_logs/unit_test_results.json
116   - htmlcov/
117   -
118   - # 集成测试
119   - integration-tests:
120   - runs-on: ubuntu-latest
121   - name: Integration Tests
122   - needs: [code-quality, unit-tests]
123   -
124   - services:
125   - elasticsearch:
126   - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
127   - env:
128   - discovery.type: single-node
129   - ES_JAVA_OPTS: -Xms1g -Xmx1g
130   - xpack.security.enabled: false
131   - ports:
132   - - 9200:9200
133   - options: >-
134   - --health-cmd "curl http://localhost:9200/_cluster/health"
135   - --health-interval 10s
136   - --health-timeout 5s
137   - --health-retries 10
138   -
139   - steps:
140   - - name: Checkout code
141   - uses: actions/checkout@v4
142   -
143   - - name: Set up Python
144   - uses: actions/setup-python@v4
145   - with:
146   - python-version: ${{ env.PYTHON_VERSION }}
147   -
148   - - name: Install system dependencies
149   - run: |
150   - sudo apt-get update
151   - sudo apt-get install -y curl
152   -
153   - - name: Install Python dependencies
154   - run: |
155   - python -m pip install --upgrade pip
156   - pip install pytest pytest-json-report httpx
157   - pip install -r requirements.txt
158   -
159   - - name: Create test logs directory
160   - run: mkdir -p test_logs
161   -
162   - - name: Wait for Elasticsearch
163   - run: |
164   - echo "Waiting for Elasticsearch to be ready..."
165   - for i in {1..30}; do
166   - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then
167   - echo "Elasticsearch is ready"
168   - break
169   - fi
170   - echo "Attempt $i/30: Elasticsearch not ready yet"
171   - sleep 2
172   - done
173   -
174   - - name: Setup test index
175   - run: |
176   - curl -X PUT http://localhost:9200/test_products \
177   - -H 'Content-Type: application/json' \
178   - -d '{
179   - "settings": {
180   - "number_of_shards": 1,
181   - "number_of_replicas": 0
182   - },
183   - "mappings": {
184   - "properties": {
185   - "name": {"type": "text"},
186   - "brand_name": {"type": "text"},
187   - "tags": {"type": "text"},
188   - "price": {"type": "double"},
189   - "category_id": {"type": "integer"},
190   - "spu_id": {"type": "keyword"},
191   - "text_embedding": {"type": "dense_vector", "dims": 1024}
192   - }
193   - }
194   - }'
195   -
196   - - name: Insert test data
197   - run: |
198   - curl -X POST http://localhost:9200/test_products/_bulk \
199   - -H 'Content-Type: application/json' \
200   - --data-binary @- << 'EOF'
201   -{"index": {"_id": "1"}}
202   -{"name": "红色连衣裙", "brand_name": "测试品牌", "tags": ["红色", "连衣裙", "女装"], "price": 299.0, "category_id": 1, "spu_id": "dress_001"}
203   -{"index": {"_id": "2"}}
204   -{"name": "蓝色连衣裙", "brand_name": "测试品牌", "tags": ["蓝色", "连衣裙", "女装"], "price": 399.0, "category_id": 1, "spu_id": "dress_002"}
205   -{"index": {"_id": "3"}}
206   -{"name": "智能手机", "brand_name": "科技品牌", "tags": ["智能", "手机", "数码"], "price": 2999.0, "category_id": 2, "spu_id": "phone_001"}
207   -EOF
208   -
209   - - name: Run integration tests
210   - env:
211   - ES_HOST: http://localhost:9200
212   - TENANT_ID: test_tenant
213   - TESTING_MODE: true
214   - run: |
215   - python -m pytest tests/integration/ \
216   - -v \
217   - --tb=short \
218   - -m "not slow" \
219   - --json-report \
220   - --json-report-file=test_logs/integration_test_results.json
221   -
222   - - name: Upload integration test results
223   - uses: actions/upload-artifact@v3
224   - if: always()
225   - with:
226   - name: integration-test-results
227   - path: test_logs/integration_test_results.json
228   -
229   - # API测试
230   - api-tests:
231   - runs-on: ubuntu-latest
232   - name: API Tests
233   - needs: [code-quality, unit-tests]
234   -
235   - services:
236   - elasticsearch:
237   - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
238   - env:
239   - discovery.type: single-node
240   - ES_JAVA_OPTS: -Xms1g -Xmx1g
241   - xpack.security.enabled: false
242   - ports:
243   - - 9200:9200
244   - options: >-
245   - --health-cmd "curl http://localhost:9200/_cluster/health"
246   - --health-interval 10s
247   - --health-timeout 5s
248   - --health-retries 10
249   -
250   - steps:
251   - - name: Checkout code
252   - uses: actions/checkout@v4
253   -
254   - - name: Set up Python
255   - uses: actions/setup-python@v4
256   - with:
257   - python-version: ${{ env.PYTHON_VERSION }}
258   -
259   - - name: Install system dependencies
260   - run: |
261   - sudo apt-get update
262   - sudo apt-get install -y curl
263   -
264   - - name: Install Python dependencies
265   - run: |
266   - python -m pip install --upgrade pip
267   - pip install pytest pytest-json-report httpx
268   - pip install -r requirements.txt
269   -
270   - - name: Create test logs directory
271   - run: mkdir -p test_logs
272   -
273   - - name: Wait for Elasticsearch
274   - run: |
275   - echo "Waiting for Elasticsearch to be ready..."
276   - for i in {1..30}; do
277   - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then
278   - echo "Elasticsearch is ready"
279   - break
280   - fi
281   - echo "Attempt $i/30: Elasticsearch not ready yet"
282   - sleep 2
283   - done
284   -
285   - - name: Setup test index and data
286   - run: |
287   - # 创建索引
288   - curl -X PUT http://localhost:9200/test_products \
289   - -H 'Content-Type: application/json' \
290   - -d '{
291   - "settings": {"number_of_shards": 1, "number_of_replicas": 0},
292   - "mappings": {
293   - "properties": {
294   - "name": {"type": "text"}, "brand_name": {"type": "text"},
295   - "tags": {"type": "text"}, "price": {"type": "double"},
296   - "category_id": {"type": "integer"}, "spu_id": {"type": "keyword"},
297   - "text_embedding": {"type": "dense_vector", "dims": 1024}
298   - }
299   - }
300   - }'
301   -
302   - # 插入测试数据
303   - curl -X POST http://localhost:9200/test_products/_bulk \
304   - -H 'Content-Type: application/json' \
305   - --data-binary @- << 'EOF'
306   -{"index": {"_id": "1"}}
307   -{"name": "红色连衣裙", "brand_name": "测试品牌", "tags": ["红色", "连衣裙", "女装"], "price": 299.0, "category_id": 1, "spu_id": "dress_001"}
308   -{"index": {"_id": "2"}}
309   -{"name": "蓝色连衣裙", "brand_name": "测试品牌", "tags": ["蓝色", "连衣裙", "女装"], "price": 399.0, "category_id": 1, "spu_id": "dress_002"}
310   -EOF
311   -
312   - - name: Start API service
313   - env:
314   - ES_HOST: http://localhost:9200
315   - TENANT_ID: test_tenant
316   - API_HOST: 127.0.0.1
317   - API_PORT: 6003
318   - TESTING_MODE: true
319   - run: |
320   - python -m api.app \
321   - --host $API_HOST \
322   - --port $API_PORT \
323   - --tenant $TENANT_ID \
324   - --es-host $ES_HOST &
325   - echo $! > api.pid
326   -
327   - # 等待API服务启动
328   - for i in {1..30}; do
329   - if curl -s http://$API_HOST:$API_PORT/health > /dev/null; then
330   - echo "API service is ready"
331   - break
332   - fi
333   - echo "Attempt $i/30: API service not ready yet"
334   - sleep 2
335   - done
336   -
337   - - name: Run API tests
338   - env:
339   - ES_HOST: http://localhost:9200
340   - API_HOST: 127.0.0.1
341   - API_PORT: 6003
342   - TENANT_ID: test_tenant
343   - TESTING_MODE: true
344   - run: |
345   - python -m pytest tests/integration/test_api_integration.py \
346   - -v \
347   - --tb=short \
348   - --json-report \
349   - --json-report-file=test_logs/api_test_results.json
350   -
351   - - name: Stop API service
352   - if: always()
353   - run: |
354   - if [ -f api.pid ]; then
355   - kill $(cat api.pid) || true
356   - rm api.pid
357   - fi
358   -
359   - - name: Upload API test results
360   - uses: actions/upload-artifact@v3
361   - if: always()
362   - with:
363   - name: api-test-results
364   - path: test_logs/api_test_results.json
365   -
366   - # 性能测试
367   - performance-tests:
368   - runs-on: ubuntu-latest
369   - name: Performance Tests
370   - needs: [code-quality, unit-tests]
371   - if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
372   -
373   - services:
374   - elasticsearch:
375   - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
376   - env:
377   - discovery.type: single-node
378   - ES_JAVA_OPTS: -Xms2g -Xmx2g
379   - xpack.security.enabled: false
380   - ports:
381   - - 9200:9200
382   - options: >-
383   - --health-cmd "curl http://localhost:9200/_cluster/health"
384   - --health-interval 10s
385   - --health-timeout 5s
386   - --health-retries 10
387   -
388   - steps:
389   - - name: Checkout code
390   - uses: actions/checkout@v4
391   -
392   - - name: Set up Python
393   - uses: actions/setup-python@v4
394   - with:
395   - python-version: ${{ env.PYTHON_VERSION }}
396   -
397   - - name: Install dependencies
398   - run: |
399   - python -m pip install --upgrade pip
400   - pip install pytest locust
401   - pip install -r requirements.txt
402   -
403   - - name: Wait for Elasticsearch
404   - run: |
405   - echo "Waiting for Elasticsearch to be ready..."
406   - for i in {1..30}; do
407   - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then
408   - echo "Elasticsearch is ready"
409   - break
410   - fi
411   - sleep 2
412   - done
413   -
414   - - name: Setup test data
415   - run: |
416   - # 创建并填充测试索引
417   - python scripts/create_test_data.py --count 1000
418   -
419   - - name: Run performance tests
420   - env:
421   - ES_HOST: http://localhost:9200
422   - TESTING_MODE: true
423   - run: |
424   - python scripts/run_performance_tests.py
425   -
426   - - name: Upload performance results
427   - uses: actions/upload-artifact@v3
428   - if: always()
429   - with:
430   - name: performance-test-results
431   - path: performance_results/
432   -
433   - # 安全扫描
434   - security-scan:
  11 + service-contract-tests:
435 12 runs-on: ubuntu-latest
436   - name: Security Scan
437   - needs: [code-quality]
  13 + name: Service Contract Tests
438 14  
439 15 steps:
440   - - name: Checkout code
441   - uses: actions/checkout@v4
442   -
443   - - name: Set up Python
444   - uses: actions/setup-python@v4
445   - with:
446   - python-version: ${{ env.PYTHON_VERSION }}
447   -
448   - - name: Install security scanning tools
449   - run: |
450   - python -m pip install --upgrade pip
451   - pip install safety bandit
452   -
453   - - name: Run Safety (dependency check)
454   - run: |
455   - safety check --json --output safety_report.json || true
456   -
457   - - name: Run Bandit (security linter)
458   - run: |
459   - bandit -r . -f json -o bandit_report.json || true
460   -
461   - - name: Upload security reports
462   - uses: actions/upload-artifact@v3
463   - if: always()
464   - with:
465   - name: security-reports
466   - path: |
467   - safety_report.json
468   - bandit_report.json
469   -
470   - # 测试结果汇总
471   - test-summary:
472   - runs-on: ubuntu-latest
473   - name: Test Summary
474   - needs: [unit-tests, integration-tests, api-tests, security-scan]
475   - if: always()
476   -
477   - steps:
478   - - name: Checkout code
479   - uses: actions/checkout@v4
480   -
481   - - name: Download all test artifacts
482   - uses: actions/download-artifact@v3
483   -
484   - - name: Generate test summary
485   - run: |
486   - python scripts/generate_test_summary.py
487   -
488   - - name: Upload final report
489   - uses: actions/upload-artifact@v3
490   - with:
491   - name: final-test-report
492   - path: final_test_report.*
493   -
494   - - name: Comment PR with results
495   - if: github.event_name == 'pull_request'
496   - uses: actions/github-script@v6
497   - with:
498   - script: |
499   - const fs = require('fs');
500   -
501   - // 读取测试报告
502   - let reportContent = '';
503   - try {
504   - reportContent = fs.readFileSync('final_test_report.txt', 'utf8');
505   - } catch (e) {
506   - console.log('Could not read report file');
507   - return;
508   - }
509   -
510   - // 提取摘要信息
511   - const lines = reportContent.split('\n');
512   - let summary = '';
513   - let inSummary = false;
514   -
515   - for (const line of lines) {
516   - if (line.includes('测试摘要')) {
517   - inSummary = true;
518   - continue;
519   - }
520   - if (inSummary && line.includes('测试套件详情')) {
521   - break;
522   - }
523   - if (inSummary && line.trim()) {
524   - summary += line + '\n';
525   - }
526   - }
527   -
528   - // 构建评论内容
529   - const comment = `## 🧪 测试报告\n\n${summary}\n\n详细的测试报告请查看 [Artifacts](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) 部分。`;
530   -
531   - // 发送评论
532   - github.rest.issues.createComment({
533   - issue_number: context.issue.number,
534   - owner: context.repo.owner,
535   - repo: context.repo.repo,
536   - body: comment
537   - });
538 16 \ No newline at end of file
  17 + - name: Checkout code
  18 + uses: actions/checkout@v4
  19 +
  20 + - name: Set up Python
  21 + uses: actions/setup-python@v5
  22 + with:
  23 + python-version: "3.11"
  24 +
  25 + - name: Install dependencies
  26 + run: |
  27 + python -m pip install --upgrade pip
  28 + pip install -r requirements.txt
  29 +
  30 + - name: Run CI contract tests
  31 + run: |
  32 + python -m pytest tests/ci -q
539 33 \ No newline at end of file
... ...
README.md
1   -# TODO
2   -
3   -**多语言索引**:已改为可配置的 `index_languages`(默认为 `["en", "zh"]`),商家可勾选主市场语言。支持语言见 `config.tenant_config_loader.SUPPORTED_INDEX_LANGUAGES`(含 en, zh, zh_tw, ru, ja, ko, es, fr, pt, de, it, th, vi, id, ms, ar, hi, he, my, ta, ur, bn, pl, nl, ro, tr, km, lo, yue, cs, el, sv, hu, da, fi, uk, bg 等)。
4   -
5   -前端:
6   -搜索模态框
7   -点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。
8   -但是要注意:搜索过程中,后端不要触发翻译(因为输入过程中的query翻译结果会有问题),因此需要增加一个参数:搜索类型,默认为当前的回车后发起的搜索,如果是输入过程中的结果刷新则类型为typing。
9   -
10   -
11   -
12   -多语言:
13   -语义:
14   -多语言:
15   -1. dis_max的方式
16   -"query" : {
17   - "dis_max" : {
18   - "queries" : [
19   - {"match" : { "title.en" : xxx }},
20   - {"match" : { "title.zh" : xxx }},
21   - {"match" : { "title_xx" : xxx }}
22   - ],
23   - "tie_breakler" : 0.8
24   - }
25   -}
26   -
27   -
28   -
29   -"corss_field":
30   -"multi_match" : {
31   - "query" :
32   - "fields" : [...],
33   - "type": "cross_fields",
34   - "operator" : "and"
35   -}
36   -
37   -
38   -支持英文的拼写接错:
39   -title: multi_field
40   -"query" : {
41   - "query_string" : {
42   - "query": "xxx",
43   - "default_field": "title.ngram",
44   - "minimum_should_match": "85%"
45   - }
46   -}
47   -
48   -
49   -
50   -业务提权:
51   -rescore: window_size query_weight rescore_query_weight
52   -
53   -function_score:
54   -boost
55   -以及 可以把每个子查询用function_score包一下
56   -
57   -
58   -
59   -
60   -query anchor
61   -我想给elasticsearch 增加字段 query anchor ,即哪些query点击到了这个doc,一个doc下面有多个query anchor,每个query anchor又有这两个属性:weight、dweight,分别代表 query在doc下的点击分布权重、doc在query下的点击分布权重。请问该如何设计这两个ES字段。
62   -
63   -需要有zh en两套query anchor,因为他们的解析器不一样。
64   -
65   -他的功能是辅助召回和排序。我搜索一个query,那么每个query跟 doc中的query anchor的相关性,也就是,除了将query到title 和keywords / brief等文本字段中搜索,也到 query anchor中搜索,从而辅助召回和相关性的计算。
66   -
67   -
  1 +# 电商搜索引擎 SaaS
68 2  
  3 +多租户、可配置、可扩展的电商搜索平台(Shoplazza 等独立站场景)。
69 4  
  5 +README 用于给后续开发者建立统一认知:**系统框架、模块边界、设计原则、研发流程与 CI 测试入口**,帮助持续迭代时避免分叉设计与冗余代码。
70 6  
71   -# 电商搜索引擎 SaaS
  7 +---
72 8  
73   -一个针对跨境独立站(店匠 Shoplazza 等)的多租户可配置搜索平台。README 作为项目导航入口,帮助你在不同阶段定位到更详细的文档。
  9 +## 1) 项目目标与边界
74 10  
  11 +- **目标**:在统一架构下支持关键词检索、语义检索、分面过滤、多语言、重排、图片检索。
  12 +- **边界**:本仓库负责搜索核心能力与服务编排;业务方通过标准 HTTP API 对接。
  13 +- **核心约束**:
  14 + - 调用方稳定(API/Provider 契约优先)
  15 + - 配置单一来源(`config/config.yaml` + `.env` 覆盖)
  16 + - 扩展优先走插件化(provider/backend),避免散落式分叉实现
75 17  
76   -## 项目环境
  18 +---
77 19  
78   -以项目根目录的 **`activate.sh`** 为准(**优先激活 venv:`./.venv`,并加载 `.env`;兼容 Conda 回退**):
  20 +## 2) 快速开始
79 21  
80 22 ```bash
81   -# 推荐:首次创建 venv(默认安装基础依赖)
  23 +# 首次创建环境(默认基础依赖)
82 24 ./scripts/create_venv.sh
83   -
84   -# 如需本地向量/图片编码(会安装 torch/transformers 等重依赖)
85   -# INSTALL_ML=1 ./scripts/create_venv.sh
86 25 source activate.sh
87   -```
88 26  
89   -新机器首次需创建环境,见 `docs/环境配置说明.md`(推荐 venv;Conda 为兼容旧流程)。
  27 +# 启动核心服务(backend/indexer/frontend)
  28 +./run.sh
90 29  
91   -## 测试pipeline
  30 +# 可选:附加能力服务
  31 +START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh
92 32  
93   -1.
94   -店铺1 tenant_id=162:
95   -fake数据 生成商品导入数据 提交到店匠的店铺:
96   -cd /data/saas-search && source activate.sh && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx
  33 +# 查看状态
  34 +./scripts/service_ctl.sh status
  35 +```
97 36  
98   -店铺2 tenant_id=
  37 +核心端口:
99 38  
  39 +- `6002` backend(`/search/*`, `/admin/*`)
  40 +- `6004` indexer(`/indexer/*`)
  41 +- `6003` frontend
  42 +- `6005` embedding(可选)
  43 +- `6006` translator(可选)
  44 +- `6007` reranker(可选)
100 45  
101   -2. 后端:自动同步到mysql
  46 +更完整示例见 `docs/QUICKSTART.md`。
102 47  
103   -3. mysql到ES:
  48 +---
104 49  
105   -python scripts/recreate_and_import.py \
106   - --tenant-id 162 \
107   - --db-host <mysql_host> \
108   - --db-database saas \
109   - --db-username saas \
110   - --db-password <password> \
111   - --es-host http://localhost:9200
  50 +## 3) 总体架构(开发者视角)
112 51  
113   -构造查询:
114   -参考 @常用查询 - ES.md
  52 +- `api/`:统一 API 入口(search/admin/indexer app)
  53 +- `search/`:召回、排序、结果组织
  54 +- `query/`:查询解析、多语言处理、改写
  55 +- `indexer/`:MySQL 行数据 -> ES 文档的转换与索引流程
  56 +- `providers/`:能力调用抽象(translation/embedding/rerank)
  57 +- `reranker/`:重排服务及后端实现
  58 +- `embeddings/`:向量服务(文本/图像)
  59 +- `config/`:配置加载与服务配置解析
115 60  
  61 +关键设计:**Provider(调用方式)与 Backend(推理实现)分离**,新增能力优先在协议与工厂注册,不改调用方主流程。
116 62  
117   -## 核心能力速览
  63 +---
118 64  
119   -- **多语言 + 自动翻译**:中文、英文、俄文等语言检测与路由(BGE-M3、DeepL)
120   -- **语义 + 关键词混排**:BM25、dense vector(BGE-M3/CN-CLIP)融合
121   -- **布尔与分面**:AND / OR / ANDNOT / RANK、Terms & Range facets
122   -- **多租户隔离**:共享 `search_products` 索引,通过 `tenant_id` 严格隔离
123   -- **可配置化**:字段/索引域/排序表达式/查询改写全部配置驱动
124   -- **脚本化流水线**:Mock/CSV 数据 → MySQL → Elasticsearch → API/前端
  65 +## 4) 设计原则(避免后续分叉)
125 66  
126   -## 新人入口
  67 +- **单一配置源**:服务地址、provider 选择、后端参数统一在 `config/config.yaml`,环境变量仅做覆盖。
  68 +- **接口契约优先**:外部 API 契约与 provider 契约稳定,内部重构不影响调用方。
  69 +- **扩展走工厂**:新增 provider/backend 必须在工厂函数中显式注册,禁止旁路分支。
  70 +- **可观测性优先**:健康检查、关键日志、请求上下文必须可追踪。
  71 +- **测试优先保障契约**:CI 首先保证接口契约和核心路径可用,再逐步扩展性能与业务测试。
127 72  
128   -**→ 开发者必读**:[docs/DEVELOPER_GUIDE.md](docs/DEVELOPER_GUIDE.md) — 项目全貌、设计原则、扩展规范与迭代检查清单,保证后续开发在统一框架内进行。
  73 +---
129 74  
130   -**→ 快速上手**:[docs/QUICKSTART.md](docs/QUICKSTART.md) — 环境、服务、模块、请求示例一页搞定。
  75 +## 5) 文档入口(建议阅读顺序)
131 76  
132 77 | 步骤 | 文档 |
133 78 |------|------|
134   -| 0. 框架与规范(推荐首读) | `docs/DEVELOPER_GUIDE.md` |
135   -| 1. 环境与启动 | `docs/QUICKSTART.md` |
136   -| 2. 搜索/索引 API | `docs/QUICKSTART.md` §3、`docs/搜索API速查表.md` |
137   -| 3. 运维与故障 | `docs/Usage-Guide.md` |
138   -| 4. 架构与扩展 | `docs/PROVIDER_ARCHITECTURE.md`、`docs/MODULE_EXTENSION_SPEC.md`、`docs/系统设计文档.md` |
139   -
140   -### Runtimes & 命令示例
  79 +| 0. 全局规范(首读) | `docs/DEVELOPER_GUIDE.md` |
  80 +| 1. 开发与配置 | `docs/QUICKSTART.md` |
  81 +| 2. 运行与排障 | `docs/Usage-Guide.md` |
  82 +| 3. API 详细说明 | `docs/搜索API对接指南.md` |
  83 +| 4. 快速参数速查 | `docs/搜索API速查表.md` |
  84 +| 5. 首次环境搭建 | `docs/环境配置说明.md` |
141 85  
142   -```bash
143   -# 1. 安装依赖与准备服务(环境创建见 docs/环境配置说明.md)
144   -source activate.sh # 或先 export CONDA_ROOT=你的conda路径
145   -pip install -r requirements.txt # 若用 environment.yml 创建环境可省略
146   -docker run -d --name es -p 9200:9200 elasticsearch:8.11.0
  86 +---
147 87  
148   -# 2. 构造测试数据并导入 MySQL
149   -./scripts/mock_data.sh # 详见 TEST_DATA_GUIDE.md
  88 +## 6) 持续集成测试(推荐最小集)
150 89  
151   -# 3. 创建租户索引结构并导入数据(推荐)
152   -./scripts/create_tenant_index.sh 162
153   -curl -X POST "http://localhost:6004/indexer/reindex" \
154   - -H "Content-Type: application/json" \
155   - -d '{"tenant_id":"162","batch_size":500}'
  90 +本仓库提供一套轻量、稳定、易维护的 CI 测试入口,覆盖以下服务契约:
156 91  
157   -# 4. 启动核心服务(backend/indexer/frontend)
158   -./run.sh
  92 +- 搜索接口(search API)
  93 +- 索引接口(indexer API)
  94 +- 向量服务(embedding service)
  95 +- 翻译服务(translator service)
  96 +- 重排服务(reranker service)
159 97  
160   -# (可选)附加启动 embedding / translator / reranker
161   -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh
162   -#
163   -# 查看服务状态 / 停止
164   -./scripts/service_ctl.sh status
165   -./scripts/stop.sh
  98 +本地运行:
166 99  
167   -# 5. 调用文本搜索 API
168   -curl -X POST http://localhost:6002/search/ \
169   - -H "Content-Type: application/json" \
170   - -H "X-Tenant-ID: 1" \
171   - -d '{"query": "玩具", "size": 10}'
  100 +```bash
  101 +source activate.sh
  102 +python -m pytest tests/ci -q
172 103 ```
173 104  
174   -## 文档索引
  105 +该测试集采用 mock/stub,**不依赖真实 ES/MySQL/大模型服务**,适合作为 PR 级快速回归门禁。
175 106  
176   -| 文档 | 用途 |
177   -|------|------|
178   -| `docs/DEVELOPER_GUIDE.md` | **开发者开放指南**:全貌、原则、规范、检查清单 |
179   -| `docs/QUICKSTART.md` | 新人上手:环境、服务、模块、请求 |
180   -| `docs/Usage-Guide.md` | 运维:日志、多环境、故障排查 |
181   -| `docs/搜索API速查表.md` | 搜索 API 参数速查 |
182   -| `docs/搜索API对接指南.md` | 搜索 API 完整说明 |
183   -| `docs/PROVIDER_ARCHITECTURE.md` | 翻译/向量/重排 provider 扩展 |
184   -| `docs/MODULE_EXTENSION_SPEC.md` | 向量/重排后端可插拔规范 |
185   -| `docs/环境配置说明.md` | 首次部署、新机器环境 |
186   -| `docs/系统设计文档.md` | 架构与模块细节 |
187   -
188   -## 关键工作流指引
189   -
190   -- **数据构建 → MySQL → Elasticsearch**
191   - - `scripts/mock_data.sh`:Tenant1 Mock + Tenant2 CSV 一条龙
192   - - `scripts/create_tenant_index.sh <tenant_id>` + `POST /indexer/reindex`:推荐导入链路
193   - - 详解:`测试数据指南.md`
194   -
195   -- **索引富化 & Java 对接**
196   - - Java 索引程序负责:全量/增量调度 + 从 MySQL 查询 `shoplazza_product_spu/sku/option/...`
197   - - Python `indexer` 模块负责:**MySQL 行 → ES doc** 的全部逻辑(多语言、翻译、向量、规格聚合等)
198   - - 正式对接接口(推荐):
199   - - `POST http://<indexer_host>:6004/indexer/build-docs`
200   - - 入参:`tenant_id + items[{spu, skus, options}]`
201   - - 出参:与 `mappings/search_products.json` 完全一致的 `docs` 列表,上游自行写入 ES
202   - - 调试/自测接口(内部使用):
203   - - `POST http://127.0.0.1:6004/indexer/build-docs-from-db`,只需要 `tenant_id + spu_ids`,由服务内部查库并返回 ES doc
204   - - 详解:`indexer/README.md`、`docs/索引字段说明v2.md`
205   -
206   -- **搜索服务 & API**
207   - - `api/`(FastAPI)承载 REST API,`search/` + `query/` 负责查询解析与下发
208   - - API、分页、过滤、Facet、KNN 等:`搜索API对接指南.md`
209   - - 对接案例、示例与错误码:`搜索API对接指南.md`、`Search-API-Examples.md`
210   -
211   -- **统一配置**
212   - - 所有租户共享统一的索引结构和查询配置(硬编码)
213   - - 索引 mapping: `mappings/search_products.json`
214   - - 查询配置: `search/query_config.py`
215   - - 详解:`基础配置指南.md`、`索引字段说明v2.md`
216   -
217   -## 仓库结构(概览)
  107 +---
218 108  
219   -```
220   -api/ FastAPI 服务与路由
221   -config/ 字段/索引/查询配置体系
222   -indexer/ MySQL → ES 管道(mapping / transformer / bulk)
223   -query/ 查询解析、改写、翻译、embedding
224   -search/ 多语言构建、布尔解析、排序引擎
225   -scripts/ 数据/服务脚本(mock_data, ingest, run 等)
226   -frontend/ 简易调试页面
227   -docs/ 运营及中文资料
228   -```
  109 +## 7) 代码质量与持续继承要求
  110 +
  111 +- 新增功能必须补最小测试(至少覆盖 1 条成功路径 + 1 条参数异常路径)
  112 +- 修改公共协议时必须同步更新:
  113 + - `docs/QUICKSTART.md`
  114 + - 对应服务 README / API 文档
  115 + - `tests/ci` 契约用例
  116 +- 禁止新增“临时分支逻辑”绕过 provider/backend 工厂
  117 +- 优先减少重复实现,复用现有转换链路与配置解析入口
... ...
docs/QUICKSTART.md
... ... @@ -27,6 +27,7 @@
27 27 4. [模块扩展规范(Embedding / Rerank)](#4-模块扩展规范embedding--rerank)
28 28 5. [验证、日志与常见排障入口](#5-验证日志与常见排障入口)
29 29 6. [相关文档](#6-相关文档)
  30 +7. [持续集成测试(最小可维护方案)](#7-持续集成测试最小可维护方案)
30 31  
31 32 ---
32 33  
... ... @@ -374,3 +375,32 @@ lsof -i :6004
374 375 | `indexer/README.md` | 索引模块职责与接口 |
375 376 | `embeddings/README.md` | 向量化服务说明 |
376 377 | `reranker/README.md` | 重排服务说明 |
  378 +
  379 +---
  380 +
  381 +## 7. 持续集成测试(最小可维护方案)
  382 +
  383 +目标:让后续开发者在不依赖真实 ES/MySQL/模型服务的前提下,快速验证核心服务契约不被破坏。
  384 +
  385 +### 7.1 测试范围
  386 +
  387 +`tests/ci/test_service_api_contracts.py` 覆盖:
  388 +
  389 +- 搜索接口:`/search/`、`/search/image`、`/search/suggestions`
  390 +- 索引接口:`/indexer/reindex`、`/indexer/index`、`/indexer/build-docs`
  391 +- 向量服务:`/embed/text`、`/embed/image`
  392 +- 翻译服务:`/translate`、`/health`
  393 +- 重排服务:`/rerank`、`/health`
  394 +
  395 +### 7.2 运行方式
  396 +
  397 +```bash
  398 +source activate.sh
  399 +python -m pytest tests/ci -q
  400 +```
  401 +
  402 +### 7.3 设计取舍
  403 +
  404 +- 使用 mock/stub 注入依赖,确保测试快且稳定
  405 +- 重点测“接口契约与参数行为”,而不是底层模型质量
  406 +- 作为 PR 级门禁;真实环境联调放在运维/预发布流程
... ...
scripts/run_ci_tests.sh 0 → 100755
... ... @@ -0,0 +1,9 @@
  1 +#!/bin/bash
  2 +
  3 +set -euo pipefail
  4 +
  5 +cd "$(dirname "$0")/.."
  6 +source ./activate.sh
  7 +
  8 +echo "Running CI contract tests..."
  9 +python -m pytest tests/ci -q
... ...
tests/ci/test_service_api_contracts.py 0 → 100644
... ... @@ -0,0 +1,281 @@
  1 +from __future__ import annotations
  2 +
  3 +from types import SimpleNamespace
  4 +from typing import Any, Dict, List
  5 +
  6 +import numpy as np
  7 +import pytest
  8 +from fastapi.testclient import TestClient
  9 +
  10 +
  11 +class _FakeSearcher:
  12 + def search(self, **kwargs):
  13 + return SimpleNamespace(
  14 + results=[
  15 + {
  16 + "spu_id": "spu-1",
  17 + "title": "测试商品",
  18 + "price": 99.0,
  19 + "currency": "USD",
  20 + "in_stock": True,
  21 + "skus": [],
  22 + "relevance_score": 1.2,
  23 + }
  24 + ],
  25 + total=1,
  26 + max_score=1.2,
  27 + took_ms=8,
  28 + facets=[],
  29 + query_info={"normalized_query": kwargs.get("query", "")},
  30 + suggestions=[],
  31 + related_searches=[],
  32 + debug_info=None,
  33 + )
  34 +
  35 + def search_by_image(self, **kwargs):
  36 + return self.search(**kwargs)
  37 +
  38 +
  39 +class _FakeSuggestionService:
  40 + def search(self, **kwargs):
  41 + return {
  42 + "query": kwargs["query"],
  43 + "language": kwargs.get("language", "en"),
  44 + "resolved_language": kwargs.get("language", "en"),
  45 + "suggestions": [{"text": "iphone 15", "score": 1.0}],
  46 + "took_ms": 3,
  47 + }
  48 +
  49 +
  50 +@pytest.fixture
  51 +def search_client(monkeypatch):
  52 + import api.app as search_app
  53 +
  54 + monkeypatch.setattr(search_app, "init_service", lambda es_host="": None)
  55 + monkeypatch.setattr(search_app, "get_searcher", lambda: _FakeSearcher())
  56 + monkeypatch.setattr(search_app, "get_suggestion_service", lambda: _FakeSuggestionService())
  57 +
  58 + with TestClient(search_app.app) as client:
  59 + yield client
  60 +
  61 +
  62 +def test_search_api_contract(search_client: TestClient):
  63 + response = search_client.post(
  64 + "/search/",
  65 + headers={"X-Tenant-ID": "162"},
  66 + json={"query": "toy", "size": 5},
  67 + )
  68 + assert response.status_code == 200
  69 + data = response.json()
  70 + assert data["total"] == 1
  71 + assert data["results"][0]["spu_id"] == "spu-1"
  72 +
  73 +
  74 +def test_image_search_api_contract(search_client: TestClient):
  75 + response = search_client.post(
  76 + "/search/image",
  77 + headers={"X-Tenant-ID": "162"},
  78 + json={"image_url": "https://example.com/a.jpg", "size": 3},
  79 + )
  80 + assert response.status_code == 200
  81 + assert response.json()["results"][0]["spu_id"] == "spu-1"
  82 +
  83 +
  84 +def test_suggestion_api_contract(search_client: TestClient):
  85 + response = search_client.get(
  86 + "/search/suggestions?q=iph&size=5&language=en",
  87 + headers={"X-Tenant-ID": "162"},
  88 + )
  89 + assert response.status_code == 200
  90 + data = response.json()
  91 + assert data["query"] == "iph"
  92 + assert len(data["suggestions"]) == 1
  93 +
  94 +
  95 +class _FakeBulkService:
  96 + def bulk_index(self, tenant_id: str, recreate_index: bool, batch_size: int):
  97 + return {
  98 + "tenant_id": tenant_id,
  99 + "recreate_index": recreate_index,
  100 + "batch_size": batch_size,
  101 + "success": True,
  102 + }
  103 +
  104 +
  105 +class _FakeTransformer:
  106 + def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options):
  107 + return {
  108 + "tenant_id": tenant_id,
  109 + "spu_id": str(spu_row.get("id", "0")),
  110 + "title": {"zh": str(spu_row.get("title", ""))},
  111 + }
  112 +
  113 +
  114 +class _FakeIncrementalService:
  115 + def index_spus_to_es(self, es_client, tenant_id: str, spu_ids: List[str], delete_spu_ids=None):
  116 + return {
  117 + "tenant_id": tenant_id,
  118 + "spu_ids": [{"spu_id": s, "status": "indexed"} for s in spu_ids],
  119 + "delete_spu_ids": [],
  120 + "total": len(spu_ids),
  121 + "success_count": len(spu_ids),
  122 + "failed_count": 0,
  123 + }
  124 +
  125 + def _get_transformer_bundle(self, tenant_id: str):
  126 + return _FakeTransformer(), None, False
  127 +
  128 +
  129 +@pytest.fixture
  130 +def indexer_client(monkeypatch):
  131 + import api.indexer_app as indexer_app
  132 + import api.routes.indexer as indexer_routes
  133 +
  134 + monkeypatch.setattr(indexer_app, "init_indexer_service", lambda es_host="": None)
  135 + monkeypatch.setattr(indexer_routes, "get_bulk_indexing_service", lambda: _FakeBulkService())
  136 + monkeypatch.setattr(indexer_routes, "get_incremental_service", lambda: _FakeIncrementalService())
  137 + monkeypatch.setattr(indexer_routes, "get_es_client", lambda: object())
  138 +
  139 + with TestClient(indexer_app.app) as client:
  140 + yield client
  141 +
  142 +
  143 +def test_indexer_reindex_contract(indexer_client: TestClient):
  144 + response = indexer_client.post(
  145 + "/indexer/reindex",
  146 + json={"tenant_id": "162", "batch_size": 100},
  147 + )
  148 + assert response.status_code == 200
  149 + assert response.json()["success"] is True
  150 +
  151 +
  152 +def test_indexer_incremental_contract(indexer_client: TestClient):
  153 + response = indexer_client.post(
  154 + "/indexer/index",
  155 + json={"tenant_id": "162", "spu_ids": ["1001", "1002"]},
  156 + )
  157 + assert response.status_code == 200
  158 + data = response.json()
  159 + assert data["success_count"] == 2
  160 +
  161 +
  162 +def test_indexer_build_docs_contract(indexer_client: TestClient):
  163 + response = indexer_client.post(
  164 + "/indexer/build-docs",
  165 + json={
  166 + "tenant_id": "162",
  167 + "items": [{"spu": {"id": 1, "title": "T-shirt"}, "skus": [], "options": []}],
  168 + },
  169 + )
  170 + assert response.status_code == 200
  171 + data = response.json()
  172 + assert data["success_count"] == 1
  173 + assert data["docs"][0]["spu_id"] == "1"
  174 +
  175 +
  176 +class _FakeTextModel:
  177 + def encode_batch(self, texts, batch_size=32, device="cpu"):
  178 + return [np.array([0.1, 0.2, 0.3], dtype=np.float32) for _ in texts]
  179 +
  180 +
  181 +class _FakeImageModel:
  182 + def encode_image_urls(self, urls, batch_size=8):
  183 + return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls]
  184 +
  185 +
  186 +@pytest.fixture
  187 +def embedding_client():
  188 + import embeddings.server as emb_server
  189 +
  190 + emb_server.app.router.on_startup.clear()
  191 + emb_server._text_model = _FakeTextModel()
  192 + emb_server._image_model = _FakeImageModel()
  193 +
  194 + with TestClient(emb_server.app) as client:
  195 + yield client
  196 +
  197 +
  198 +def test_embedding_text_contract(embedding_client: TestClient):
  199 + response = embedding_client.post("/embed/text", json=["hello", "world"])
  200 + assert response.status_code == 200
  201 + data = response.json()
  202 + assert len(data) == 2
  203 + assert len(data[0]) == 3
  204 +
  205 +
  206 +def test_embedding_image_contract(embedding_client: TestClient):
  207 + response = embedding_client.post("/embed/image", json=["https://example.com/a.jpg"])
  208 + assert response.status_code == 200
  209 + assert len(response.json()[0]) == 3
  210 +
  211 +
  212 +class _FakeTranslator:
  213 + model = "qwen"
  214 + use_cache = True
  215 +
  216 + def translate(self, text: str, target_lang: str, source_lang: str | None = None, prompt: str | None = None):
  217 + return f"{text}-{target_lang}"
  218 +
  219 +
  220 +@pytest.fixture
  221 +def translator_client(monkeypatch):
  222 + import api.translator_app as translator_app
  223 +
  224 + translator_app.app.router.on_startup.clear()
  225 + monkeypatch.setattr(translator_app, "get_translator", lambda model="qwen": _FakeTranslator())
  226 +
  227 + with TestClient(translator_app.app) as client:
  228 + yield client
  229 +
  230 +
  231 +def test_translator_api_contract(translator_client: TestClient):
  232 + response = translator_client.post(
  233 + "/translate",
  234 + json={"text": "商品名称", "target_lang": "en", "source_lang": "zh"},
  235 + )
  236 + assert response.status_code == 200
  237 + assert response.json()["translated_text"] == "商品名称-en"
  238 +
  239 +
  240 +def test_translator_health_contract(translator_client: TestClient):
  241 + response = translator_client.get("/health")
  242 + assert response.status_code == 200
  243 + assert response.json()["status"] == "healthy"
  244 +
  245 +
  246 +class _FakeReranker:
  247 + _model_name = "fake-reranker"
  248 +
  249 + def score_with_meta(self, query: str, docs: List[str], normalize: bool = True):
  250 + scores = [float(i + 1) for i in range(len(docs))]
  251 + meta: Dict[str, Any] = {"input_docs": len(docs), "unique_docs": len(set(docs))}
  252 + return scores, meta
  253 +
  254 +
  255 +@pytest.fixture
  256 +def reranker_client():
  257 + import reranker.server as reranker_server
  258 +
  259 + reranker_server.app.router.on_startup.clear()
  260 + reranker_server._reranker = _FakeReranker()
  261 + reranker_server._backend_name = "fake"
  262 +
  263 + with TestClient(reranker_server.app) as client:
  264 + yield client
  265 +
  266 +
  267 +def test_reranker_api_contract(reranker_client: TestClient):
  268 + response = reranker_client.post(
  269 + "/rerank",
  270 + json={"query": "wireless mouse", "docs": ["doc-a", "doc-b"]},
  271 + )
  272 + assert response.status_code == 200
  273 + data = response.json()
  274 + assert data["scores"] == [1.0, 2.0]
  275 + assert data["meta"]["input_docs"] == 2
  276 +
  277 +
  278 +def test_reranker_health_contract(reranker_client: TestClient):
  279 + response = reranker_client.get("/health")
  280 + assert response.status_code == 200
  281 + assert response.json()["status"] == "ok"
... ...
tests/test_cloud_embedding.py
... ... @@ -11,6 +11,8 @@ import time
11 11 from datetime import datetime
12 12 from pathlib import Path
13 13  
  14 +import pytest
  15 +
14 16 # Add parent directory to path
15 17 sys.path.insert(0, str(Path(__file__).parent.parent))
16 18  
... ... @@ -44,6 +46,7 @@ def read_queries(file_path: str, limit: int = 100) -&gt; list:
44 46 return queries
45 47  
46 48  
  49 +@pytest.mark.skip(reason="Requires data file and DASHSCOPE_API_KEY; run manually when needed")
47 50 def test_cloud_embedding(queries_file: str, num_queries: int = 100):
48 51 """
49 52 Test cloud embedding with queries from file.
... ...
tests/test_cnclip_service.py
... ... @@ -13,11 +13,17 @@ CN-CLIP 服务测试脚本
13 13 """
14 14  
15 15 import sys
16   -import numpy as np
17   -from clip_client import Client
18 16  
  17 +import pytest
19 18  
20   -def test_encoding(client, test_name, inputs):
  19 +try:
  20 + import numpy as np
  21 + from clip_client import Client
  22 +except ImportError:
  23 + pytest.skip("clip_client not installed (optional clip-as-service client)", allow_module_level=True)
  24 +
  25 +
  26 +def _test_encoding(client, test_name, inputs):
21 27 """测试编码功能"""
22 28 print(f"\n{test_name}...")
23 29 try:
... ... @@ -74,21 +80,21 @@ def main():
74 80 results = []
75 81  
76 82 # 测试1: 文本编码
77   - results.append(test_encoding(
  83 + results.append(_test_encoding(
78 84 client,
79 85 "测试1: 编码文本",
80 86 ['这是一个测试文本', '另一个测试文本']
81 87 ))
82 88  
83 89 # 测试2: 图像编码
84   - results.append(test_encoding(
  90 + results.append(_test_encoding(
85 91 client,
86 92 "测试2: 编码图像(远程 URL)",
87 93 ['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg']
88 94 ))
89 95  
90 96 # 测试3: 混合编码
91   - results.append(test_encoding(
  97 + results.append(_test_encoding(
92 98 client,
93 99 "测试3: 混合编码(文本和图像)",
94 100 ['这是一段文本', 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg']
... ...