Commit 7299bae6396856b85f415a780200216d2d97fede

Authored by tangwang
1 parent bc54124c

tests

.github/workflows/test.yml
1 -name: saas-search Test Pipeline 1 +name: CI - Service Contracts
2 2
3 on: 3 on:
4 push: 4 push:
5 - branches: [ main, master, develop ] 5 + branches: [main, master, develop]
6 pull_request: 6 pull_request:
7 - branches: [ main, master, develop ]  
8 - workflow_dispatch: # 允许手动触发  
9 -  
10 -env:  
11 - PYTHON_VERSION: '3.9'  
12 - NODE_VERSION: '16' 7 + branches: [main, master, develop]
  8 + workflow_dispatch:
13 9
14 jobs: 10 jobs:
15 - # 代码质量检查  
16 - code-quality:  
17 - runs-on: ubuntu-latest  
18 - name: Code Quality Check  
19 -  
20 - steps:  
21 - - name: Checkout code  
22 - uses: actions/checkout@v4  
23 -  
24 - - name: Set up Python  
25 - uses: actions/setup-python@v4  
26 - with:  
27 - python-version: ${{ env.PYTHON_VERSION }}  
28 -  
29 - - name: Install dependencies  
30 - run: |  
31 - python -m pip install --upgrade pip  
32 - pip install flake8 black isort mypy pylint  
33 - pip install -r requirements.txt  
34 -  
35 - - name: Run Black (code formatting)  
36 - run: |  
37 - black --check --diff .  
38 -  
39 - - name: Run isort (import sorting)  
40 - run: |  
41 - isort --check-only --diff .  
42 -  
43 - - name: Run Flake8 (linting)  
44 - run: |  
45 - flake8 --max-line-length=100 --ignore=E203,W503 .  
46 -  
47 - - name: Run MyPy (type checking)  
48 - run: |  
49 - mypy --ignore-missing-imports --no-strict-optional .  
50 -  
51 - - name: Run Pylint  
52 - run: |  
53 - pylint --disable=C0114,C0115,C0116 --errors-only .  
54 -  
55 - # 单元测试  
56 - unit-tests:  
57 - runs-on: ubuntu-latest  
58 - name: Unit Tests  
59 -  
60 - strategy:  
61 - matrix:  
62 - python-version: ['3.8', '3.9', '3.10', '3.11']  
63 -  
64 - steps:  
65 - - name: Checkout code  
66 - uses: actions/checkout@v4  
67 -  
68 - - name: Set up Python ${{ matrix.python-version }}  
69 - uses: actions/setup-python@v4  
70 - with:  
71 - python-version: ${{ matrix.python-version }}  
72 -  
73 - - name: Cache pip dependencies  
74 - uses: actions/cache@v3  
75 - with:  
76 - path: ~/.cache/pip  
77 - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt') }}  
78 - restore-keys: |  
79 - ${{ runner.os }}-pip-  
80 -  
81 - - name: Install dependencies  
82 - run: |  
83 - python -m pip install --upgrade pip  
84 - pip install pytest pytest-cov pytest-json-report  
85 - pip install -r requirements.txt  
86 -  
87 - - name: Create test logs directory  
88 - run: mkdir -p test_logs  
89 -  
90 - - name: Run unit tests  
91 - run: |  
92 - python -m pytest tests/unit/ \  
93 - -v \  
94 - --tb=short \  
95 - --cov=. \  
96 - --cov-report=xml \  
97 - --cov-report=html \  
98 - --cov-report=term-missing \  
99 - --json-report \  
100 - --json-report-file=test_logs/unit_test_results.json  
101 -  
102 - - name: Upload coverage to Codecov  
103 - uses: codecov/codecov-action@v3  
104 - with:  
105 - file: ./coverage.xml  
106 - flags: unittests  
107 - name: codecov-umbrella  
108 -  
109 - - name: Upload unit test results  
110 - uses: actions/upload-artifact@v3  
111 - if: always()  
112 - with:  
113 - name: unit-test-results-${{ matrix.python-version }}  
114 - path: |  
115 - test_logs/unit_test_results.json  
116 - htmlcov/  
117 -  
118 - # 集成测试  
119 - integration-tests:  
120 - runs-on: ubuntu-latest  
121 - name: Integration Tests  
122 - needs: [code-quality, unit-tests]  
123 -  
124 - services:  
125 - elasticsearch:  
126 - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0  
127 - env:  
128 - discovery.type: single-node  
129 - ES_JAVA_OPTS: -Xms1g -Xmx1g  
130 - xpack.security.enabled: false  
131 - ports:  
132 - - 9200:9200  
133 - options: >-  
134 - --health-cmd "curl http://localhost:9200/_cluster/health"  
135 - --health-interval 10s  
136 - --health-timeout 5s  
137 - --health-retries 10  
138 -  
139 - steps:  
140 - - name: Checkout code  
141 - uses: actions/checkout@v4  
142 -  
143 - - name: Set up Python  
144 - uses: actions/setup-python@v4  
145 - with:  
146 - python-version: ${{ env.PYTHON_VERSION }}  
147 -  
148 - - name: Install system dependencies  
149 - run: |  
150 - sudo apt-get update  
151 - sudo apt-get install -y curl  
152 -  
153 - - name: Install Python dependencies  
154 - run: |  
155 - python -m pip install --upgrade pip  
156 - pip install pytest pytest-json-report httpx  
157 - pip install -r requirements.txt  
158 -  
159 - - name: Create test logs directory  
160 - run: mkdir -p test_logs  
161 -  
162 - - name: Wait for Elasticsearch  
163 - run: |  
164 - echo "Waiting for Elasticsearch to be ready..."  
165 - for i in {1..30}; do  
166 - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then  
167 - echo "Elasticsearch is ready"  
168 - break  
169 - fi  
170 - echo "Attempt $i/30: Elasticsearch not ready yet"  
171 - sleep 2  
172 - done  
173 -  
174 - - name: Setup test index  
175 - run: |  
176 - curl -X PUT http://localhost:9200/test_products \  
177 - -H 'Content-Type: application/json' \  
178 - -d '{  
179 - "settings": {  
180 - "number_of_shards": 1,  
181 - "number_of_replicas": 0  
182 - },  
183 - "mappings": {  
184 - "properties": {  
185 - "name": {"type": "text"},  
186 - "brand_name": {"type": "text"},  
187 - "tags": {"type": "text"},  
188 - "price": {"type": "double"},  
189 - "category_id": {"type": "integer"},  
190 - "spu_id": {"type": "keyword"},  
191 - "text_embedding": {"type": "dense_vector", "dims": 1024}  
192 - }  
193 - }  
194 - }'  
195 -  
196 - - name: Insert test data  
197 - run: |  
198 - curl -X POST http://localhost:9200/test_products/_bulk \  
199 - -H 'Content-Type: application/json' \  
200 - --data-binary @- << 'EOF'  
201 -{"index": {"_id": "1"}}  
202 -{"name": "红色连衣裙", "brand_name": "测试品牌", "tags": ["红色", "连衣裙", "女装"], "price": 299.0, "category_id": 1, "spu_id": "dress_001"}  
203 -{"index": {"_id": "2"}}  
204 -{"name": "蓝色连衣裙", "brand_name": "测试品牌", "tags": ["蓝色", "连衣裙", "女装"], "price": 399.0, "category_id": 1, "spu_id": "dress_002"}  
205 -{"index": {"_id": "3"}}  
206 -{"name": "智能手机", "brand_name": "科技品牌", "tags": ["智能", "手机", "数码"], "price": 2999.0, "category_id": 2, "spu_id": "phone_001"}  
207 -EOF  
208 -  
209 - - name: Run integration tests  
210 - env:  
211 - ES_HOST: http://localhost:9200  
212 - TENANT_ID: test_tenant  
213 - TESTING_MODE: true  
214 - run: |  
215 - python -m pytest tests/integration/ \  
216 - -v \  
217 - --tb=short \  
218 - -m "not slow" \  
219 - --json-report \  
220 - --json-report-file=test_logs/integration_test_results.json  
221 -  
222 - - name: Upload integration test results  
223 - uses: actions/upload-artifact@v3  
224 - if: always()  
225 - with:  
226 - name: integration-test-results  
227 - path: test_logs/integration_test_results.json  
228 -  
229 - # API测试  
230 - api-tests:  
231 - runs-on: ubuntu-latest  
232 - name: API Tests  
233 - needs: [code-quality, unit-tests]  
234 -  
235 - services:  
236 - elasticsearch:  
237 - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0  
238 - env:  
239 - discovery.type: single-node  
240 - ES_JAVA_OPTS: -Xms1g -Xmx1g  
241 - xpack.security.enabled: false  
242 - ports:  
243 - - 9200:9200  
244 - options: >-  
245 - --health-cmd "curl http://localhost:9200/_cluster/health"  
246 - --health-interval 10s  
247 - --health-timeout 5s  
248 - --health-retries 10  
249 -  
250 - steps:  
251 - - name: Checkout code  
252 - uses: actions/checkout@v4  
253 -  
254 - - name: Set up Python  
255 - uses: actions/setup-python@v4  
256 - with:  
257 - python-version: ${{ env.PYTHON_VERSION }}  
258 -  
259 - - name: Install system dependencies  
260 - run: |  
261 - sudo apt-get update  
262 - sudo apt-get install -y curl  
263 -  
264 - - name: Install Python dependencies  
265 - run: |  
266 - python -m pip install --upgrade pip  
267 - pip install pytest pytest-json-report httpx  
268 - pip install -r requirements.txt  
269 -  
270 - - name: Create test logs directory  
271 - run: mkdir -p test_logs  
272 -  
273 - - name: Wait for Elasticsearch  
274 - run: |  
275 - echo "Waiting for Elasticsearch to be ready..."  
276 - for i in {1..30}; do  
277 - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then  
278 - echo "Elasticsearch is ready"  
279 - break  
280 - fi  
281 - echo "Attempt $i/30: Elasticsearch not ready yet"  
282 - sleep 2  
283 - done  
284 -  
285 - - name: Setup test index and data  
286 - run: |  
287 - # 创建索引  
288 - curl -X PUT http://localhost:9200/test_products \  
289 - -H 'Content-Type: application/json' \  
290 - -d '{  
291 - "settings": {"number_of_shards": 1, "number_of_replicas": 0},  
292 - "mappings": {  
293 - "properties": {  
294 - "name": {"type": "text"}, "brand_name": {"type": "text"},  
295 - "tags": {"type": "text"}, "price": {"type": "double"},  
296 - "category_id": {"type": "integer"}, "spu_id": {"type": "keyword"},  
297 - "text_embedding": {"type": "dense_vector", "dims": 1024}  
298 - }  
299 - }  
300 - }'  
301 -  
302 - # 插入测试数据  
303 - curl -X POST http://localhost:9200/test_products/_bulk \  
304 - -H 'Content-Type: application/json' \  
305 - --data-binary @- << 'EOF'  
306 -{"index": {"_id": "1"}}  
307 -{"name": "红色连衣裙", "brand_name": "测试品牌", "tags": ["红色", "连衣裙", "女装"], "price": 299.0, "category_id": 1, "spu_id": "dress_001"}  
308 -{"index": {"_id": "2"}}  
309 -{"name": "蓝色连衣裙", "brand_name": "测试品牌", "tags": ["蓝色", "连衣裙", "女装"], "price": 399.0, "category_id": 1, "spu_id": "dress_002"}  
310 -EOF  
311 -  
312 - - name: Start API service  
313 - env:  
314 - ES_HOST: http://localhost:9200  
315 - TENANT_ID: test_tenant  
316 - API_HOST: 127.0.0.1  
317 - API_PORT: 6003  
318 - TESTING_MODE: true  
319 - run: |  
320 - python -m api.app \  
321 - --host $API_HOST \  
322 - --port $API_PORT \  
323 - --tenant $TENANT_ID \  
324 - --es-host $ES_HOST &  
325 - echo $! > api.pid  
326 -  
327 - # 等待API服务启动  
328 - for i in {1..30}; do  
329 - if curl -s http://$API_HOST:$API_PORT/health > /dev/null; then  
330 - echo "API service is ready"  
331 - break  
332 - fi  
333 - echo "Attempt $i/30: API service not ready yet"  
334 - sleep 2  
335 - done  
336 -  
337 - - name: Run API tests  
338 - env:  
339 - ES_HOST: http://localhost:9200  
340 - API_HOST: 127.0.0.1  
341 - API_PORT: 6003  
342 - TENANT_ID: test_tenant  
343 - TESTING_MODE: true  
344 - run: |  
345 - python -m pytest tests/integration/test_api_integration.py \  
346 - -v \  
347 - --tb=short \  
348 - --json-report \  
349 - --json-report-file=test_logs/api_test_results.json  
350 -  
351 - - name: Stop API service  
352 - if: always()  
353 - run: |  
354 - if [ -f api.pid ]; then  
355 - kill $(cat api.pid) || true  
356 - rm api.pid  
357 - fi  
358 -  
359 - - name: Upload API test results  
360 - uses: actions/upload-artifact@v3  
361 - if: always()  
362 - with:  
363 - name: api-test-results  
364 - path: test_logs/api_test_results.json  
365 -  
366 - # 性能测试  
367 - performance-tests:  
368 - runs-on: ubuntu-latest  
369 - name: Performance Tests  
370 - needs: [code-quality, unit-tests]  
371 - if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'  
372 -  
373 - services:  
374 - elasticsearch:  
375 - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0  
376 - env:  
377 - discovery.type: single-node  
378 - ES_JAVA_OPTS: -Xms2g -Xmx2g  
379 - xpack.security.enabled: false  
380 - ports:  
381 - - 9200:9200  
382 - options: >-  
383 - --health-cmd "curl http://localhost:9200/_cluster/health"  
384 - --health-interval 10s  
385 - --health-timeout 5s  
386 - --health-retries 10  
387 -  
388 - steps:  
389 - - name: Checkout code  
390 - uses: actions/checkout@v4  
391 -  
392 - - name: Set up Python  
393 - uses: actions/setup-python@v4  
394 - with:  
395 - python-version: ${{ env.PYTHON_VERSION }}  
396 -  
397 - - name: Install dependencies  
398 - run: |  
399 - python -m pip install --upgrade pip  
400 - pip install pytest locust  
401 - pip install -r requirements.txt  
402 -  
403 - - name: Wait for Elasticsearch  
404 - run: |  
405 - echo "Waiting for Elasticsearch to be ready..."  
406 - for i in {1..30}; do  
407 - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then  
408 - echo "Elasticsearch is ready"  
409 - break  
410 - fi  
411 - sleep 2  
412 - done  
413 -  
414 - - name: Setup test data  
415 - run: |  
416 - # 创建并填充测试索引  
417 - python scripts/create_test_data.py --count 1000  
418 -  
419 - - name: Run performance tests  
420 - env:  
421 - ES_HOST: http://localhost:9200  
422 - TESTING_MODE: true  
423 - run: |  
424 - python scripts/run_performance_tests.py  
425 -  
426 - - name: Upload performance results  
427 - uses: actions/upload-artifact@v3  
428 - if: always()  
429 - with:  
430 - name: performance-test-results  
431 - path: performance_results/  
432 -  
433 - # 安全扫描  
434 - security-scan: 11 + service-contract-tests:
435 runs-on: ubuntu-latest 12 runs-on: ubuntu-latest
436 - name: Security Scan  
437 - needs: [code-quality] 13 + name: Service Contract Tests
438 14
439 steps: 15 steps:
440 - - name: Checkout code  
441 - uses: actions/checkout@v4  
442 -  
443 - - name: Set up Python  
444 - uses: actions/setup-python@v4  
445 - with:  
446 - python-version: ${{ env.PYTHON_VERSION }}  
447 -  
448 - - name: Install security scanning tools  
449 - run: |  
450 - python -m pip install --upgrade pip  
451 - pip install safety bandit  
452 -  
453 - - name: Run Safety (dependency check)  
454 - run: |  
455 - safety check --json --output safety_report.json || true  
456 -  
457 - - name: Run Bandit (security linter)  
458 - run: |  
459 - bandit -r . -f json -o bandit_report.json || true  
460 -  
461 - - name: Upload security reports  
462 - uses: actions/upload-artifact@v3  
463 - if: always()  
464 - with:  
465 - name: security-reports  
466 - path: |  
467 - safety_report.json  
468 - bandit_report.json  
469 -  
470 - # 测试结果汇总  
471 - test-summary:  
472 - runs-on: ubuntu-latest  
473 - name: Test Summary  
474 - needs: [unit-tests, integration-tests, api-tests, security-scan]  
475 - if: always()  
476 -  
477 - steps:  
478 - - name: Checkout code  
479 - uses: actions/checkout@v4  
480 -  
481 - - name: Download all test artifacts  
482 - uses: actions/download-artifact@v3  
483 -  
484 - - name: Generate test summary  
485 - run: |  
486 - python scripts/generate_test_summary.py  
487 -  
488 - - name: Upload final report  
489 - uses: actions/upload-artifact@v3  
490 - with:  
491 - name: final-test-report  
492 - path: final_test_report.*  
493 -  
494 - - name: Comment PR with results  
495 - if: github.event_name == 'pull_request'  
496 - uses: actions/github-script@v6  
497 - with:  
498 - script: |  
499 - const fs = require('fs');  
500 -  
501 - // 读取测试报告  
502 - let reportContent = '';  
503 - try {  
504 - reportContent = fs.readFileSync('final_test_report.txt', 'utf8');  
505 - } catch (e) {  
506 - console.log('Could not read report file');  
507 - return;  
508 - }  
509 -  
510 - // 提取摘要信息  
511 - const lines = reportContent.split('\n');  
512 - let summary = '';  
513 - let inSummary = false;  
514 -  
515 - for (const line of lines) {  
516 - if (line.includes('测试摘要')) {  
517 - inSummary = true;  
518 - continue;  
519 - }  
520 - if (inSummary && line.includes('测试套件详情')) {  
521 - break;  
522 - }  
523 - if (inSummary && line.trim()) {  
524 - summary += line + '\n';  
525 - }  
526 - }  
527 -  
528 - // 构建评论内容  
529 - const comment = `## 🧪 测试报告\n\n${summary}\n\n详细的测试报告请查看 [Artifacts](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) 部分。`;  
530 -  
531 - // 发送评论  
532 - github.rest.issues.createComment({  
533 - issue_number: context.issue.number,  
534 - owner: context.repo.owner,  
535 - repo: context.repo.repo,  
536 - body: comment  
537 - });  
538 \ No newline at end of file 16 \ No newline at end of file
  17 + - name: Checkout code
  18 + uses: actions/checkout@v4
  19 +
  20 + - name: Set up Python
  21 + uses: actions/setup-python@v5
  22 + with:
  23 + python-version: "3.11"
  24 +
  25 + - name: Install dependencies
  26 + run: |
  27 + python -m pip install --upgrade pip
  28 + pip install -r requirements.txt
  29 +
  30 + - name: Run CI contract tests
  31 + run: |
  32 + python -m pytest tests/ci -q
539 \ No newline at end of file 33 \ No newline at end of file
1 -# TODO  
2 -  
3 -**多语言索引**:已改为可配置的 `index_languages`(默认为 `["en", "zh"]`),商家可勾选主市场语言。支持语言见 `config.tenant_config_loader.SUPPORTED_INDEX_LANGUAGES`(含 en, zh, zh_tw, ru, ja, ko, es, fr, pt, de, it, th, vi, id, ms, ar, hi, he, my, ta, ur, bn, pl, nl, ro, tr, km, lo, yue, cs, el, sv, hu, da, fi, uk, bg 等)。  
4 -  
5 -前端:  
6 -搜索模态框  
7 -点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。  
8 -但是要注意:搜索过程中,后端不要触发翻译(因为输入过程中的query翻译结果会有问题),因此需要增加一个参数:搜索类型,默认为当前的回车后发起的搜索,如果是输入过程中的结果刷新则类型为typing。  
9 -  
10 -  
11 -  
12 -多语言:  
13 -语义:  
14 -多语言:  
15 -1. dis_max的方式  
16 -"query" : {  
17 - "dis_max" : {  
18 - "queries" : [  
19 - {"match" : { "title.en" : xxx }},  
20 - {"match" : { "title.zh" : xxx }},  
21 - {"match" : { "title_xx" : xxx }}  
22 - ],  
23 - "tie_breakler" : 0.8  
24 - }  
25 -}  
26 -  
27 -  
28 -  
29 -"corss_field":  
30 -"multi_match" : {  
31 - "query" :  
32 - "fields" : [...],  
33 - "type": "cross_fields",  
34 - "operator" : "and"  
35 -}  
36 -  
37 -  
38 -支持英文的拼写接错:  
39 -title: multi_field  
40 -"query" : {  
41 - "query_string" : {  
42 - "query": "xxx",  
43 - "default_field": "title.ngram",  
44 - "minimum_should_match": "85%"  
45 - }  
46 -}  
47 -  
48 -  
49 -  
50 -业务提权:  
51 -rescore: window_size query_weight rescore_query_weight  
52 -  
53 -function_score:  
54 -boost  
55 -以及 可以把每个子查询用function_score包一下  
56 -  
57 -  
58 -  
59 -  
60 -query anchor  
61 -我想给elasticsearch 增加字段 query anchor ,即哪些query点击到了这个doc,一个doc下面有多个query anchor,每个query anchor又有这两个属性:weight、dweight,分别代表 query在doc下的点击分布权重、doc在query下的点击分布权重。请问该如何设计这两个ES字段。  
62 -  
63 -需要有zh en两套query anchor,因为他们的解析器不一样。  
64 -  
65 -他的功能是辅助召回和排序。我搜索一个query,那么每个query跟 doc中的query anchor的相关性,也就是,除了将query到title 和keywords / brief等文本字段中搜索,也到 query anchor中搜索,从而辅助召回和相关性的计算。  
66 -  
67 - 1 +# 电商搜索引擎 SaaS
68 2
  3 +多租户、可配置、可扩展的电商搜索平台(Shoplazza 等独立站场景)。
69 4
  5 +README 用于给后续开发者建立统一认知:**系统框架、模块边界、设计原则、研发流程与 CI 测试入口**,帮助持续迭代时避免分叉设计与冗余代码。
70 6
71 -# 电商搜索引擎 SaaS 7 +---
72 8
73 -一个针对跨境独立站(店匠 Shoplazza 等)的多租户可配置搜索平台。README 作为项目导航入口,帮助你在不同阶段定位到更详细的文档。 9 +## 1) 项目目标与边界
74 10
  11 +- **目标**:在统一架构下支持关键词检索、语义检索、分面过滤、多语言、重排、图片检索。
  12 +- **边界**:本仓库负责搜索核心能力与服务编排;业务方通过标准 HTTP API 对接。
  13 +- **核心约束**:
  14 + - 调用方稳定(API/Provider 契约优先)
  15 + - 配置单一来源(`config/config.yaml` + `.env` 覆盖)
  16 + - 扩展优先走插件化(provider/backend),避免散落式分叉实现
75 17
76 -## 项目环境 18 +---
77 19
78 -以项目根目录的 **`activate.sh`** 为准(**优先激活 venv:`./.venv`,并加载 `.env`;兼容 Conda 回退**): 20 +## 2) 快速开始
79 21
80 ```bash 22 ```bash
81 -# 推荐:首次创建 venv(默认安装基础依赖) 23 +# 首次创建环境(默认基础依赖)
82 ./scripts/create_venv.sh 24 ./scripts/create_venv.sh
83 -  
84 -# 如需本地向量/图片编码(会安装 torch/transformers 等重依赖)  
85 -# INSTALL_ML=1 ./scripts/create_venv.sh  
86 source activate.sh 25 source activate.sh
87 -```  
88 26
89 -新机器首次需创建环境,见 `docs/环境配置说明.md`(推荐 venv;Conda 为兼容旧流程)。 27 +# 启动核心服务(backend/indexer/frontend)
  28 +./run.sh
90 29
91 -## 测试pipeline 30 +# 可选:附加能力服务
  31 +START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh
92 32
93 -1.  
94 -店铺1 tenant_id=162:  
95 -fake数据 生成商品导入数据 提交到店匠的店铺:  
96 -cd /data/saas-search && source activate.sh && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx 33 +# 查看状态
  34 +./scripts/service_ctl.sh status
  35 +```
97 36
98 -店铺2 tenant_id= 37 +核心端口:
99 38
  39 +- `6002` backend(`/search/*`, `/admin/*`)
  40 +- `6004` indexer(`/indexer/*`)
  41 +- `6003` frontend
  42 +- `6005` embedding(可选)
  43 +- `6006` translator(可选)
  44 +- `6007` reranker(可选)
100 45
101 -2. 后端:自动同步到mysql 46 +更完整示例见 `docs/QUICKSTART.md`。
102 47
103 -3. mysql到ES: 48 +---
104 49
105 -python scripts/recreate_and_import.py \  
106 - --tenant-id 162 \  
107 - --db-host <mysql_host> \  
108 - --db-database saas \  
109 - --db-username saas \  
110 - --db-password <password> \  
111 - --es-host http://localhost:9200 50 +## 3) 总体架构(开发者视角)
112 51
113 -构造查询:  
114 -参考 @常用查询 - ES.md 52 +- `api/`:统一 API 入口(search/admin/indexer app)
  53 +- `search/`:召回、排序、结果组织
  54 +- `query/`:查询解析、多语言处理、改写
  55 +- `indexer/`:MySQL 行数据 -> ES 文档的转换与索引流程
  56 +- `providers/`:能力调用抽象(translation/embedding/rerank)
  57 +- `reranker/`:重排服务及后端实现
  58 +- `embeddings/`:向量服务(文本/图像)
  59 +- `config/`:配置加载与服务配置解析
115 60
  61 +关键设计:**Provider(调用方式)与 Backend(推理实现)分离**,新增能力优先在协议与工厂注册,不改调用方主流程。
116 62
117 -## 核心能力速览 63 +---
118 64
119 -- **多语言 + 自动翻译**:中文、英文、俄文等语言检测与路由(BGE-M3、DeepL)  
120 -- **语义 + 关键词混排**:BM25、dense vector(BGE-M3/CN-CLIP)融合  
121 -- **布尔与分面**:AND / OR / ANDNOT / RANK、Terms & Range facets  
122 -- **多租户隔离**:共享 `search_products` 索引,通过 `tenant_id` 严格隔离  
123 -- **可配置化**:字段/索引域/排序表达式/查询改写全部配置驱动  
124 -- **脚本化流水线**:Mock/CSV 数据 → MySQL → Elasticsearch → API/前端 65 +## 4) 设计原则(避免后续分叉)
125 66
126 -## 新人入口 67 +- **单一配置源**:服务地址、provider 选择、后端参数统一在 `config/config.yaml`,环境变量仅做覆盖。
  68 +- **接口契约优先**:外部 API 契约与 provider 契约稳定,内部重构不影响调用方。
  69 +- **扩展走工厂**:新增 provider/backend 必须在工厂函数中显式注册,禁止旁路分支。
  70 +- **可观测性优先**:健康检查、关键日志、请求上下文必须可追踪。
  71 +- **测试优先保障契约**:CI 首先保证接口契约和核心路径可用,再逐步扩展性能与业务测试。
127 72
128 -**→ 开发者必读**:[docs/DEVELOPER_GUIDE.md](docs/DEVELOPER_GUIDE.md) — 项目全貌、设计原则、扩展规范与迭代检查清单,保证后续开发在统一框架内进行。 73 +---
129 74
130 -**→ 快速上手**:[docs/QUICKSTART.md](docs/QUICKSTART.md) — 环境、服务、模块、请求示例一页搞定。 75 +## 5) 文档入口(建议阅读顺序)
131 76
132 | 步骤 | 文档 | 77 | 步骤 | 文档 |
133 |------|------| 78 |------|------|
134 -| 0. 框架与规范(推荐首读) | `docs/DEVELOPER_GUIDE.md` |  
135 -| 1. 环境与启动 | `docs/QUICKSTART.md` |  
136 -| 2. 搜索/索引 API | `docs/QUICKSTART.md` §3、`docs/搜索API速查表.md` |  
137 -| 3. 运维与故障 | `docs/Usage-Guide.md` |  
138 -| 4. 架构与扩展 | `docs/PROVIDER_ARCHITECTURE.md`、`docs/MODULE_EXTENSION_SPEC.md`、`docs/系统设计文档.md` |  
139 -  
140 -### Runtimes & 命令示例 79 +| 0. 全局规范(首读) | `docs/DEVELOPER_GUIDE.md` |
  80 +| 1. 开发与配置 | `docs/QUICKSTART.md` |
  81 +| 2. 运行与排障 | `docs/Usage-Guide.md` |
  82 +| 3. API 详细说明 | `docs/搜索API对接指南.md` |
  83 +| 4. 快速参数速查 | `docs/搜索API速查表.md` |
  84 +| 5. 首次环境搭建 | `docs/环境配置说明.md` |
141 85
142 -```bash  
143 -# 1. 安装依赖与准备服务(环境创建见 docs/环境配置说明.md)  
144 -source activate.sh # 或先 export CONDA_ROOT=你的conda路径  
145 -pip install -r requirements.txt # 若用 environment.yml 创建环境可省略  
146 -docker run -d --name es -p 9200:9200 elasticsearch:8.11.0 86 +---
147 87
148 -# 2. 构造测试数据并导入 MySQL  
149 -./scripts/mock_data.sh # 详见 TEST_DATA_GUIDE.md 88 +## 6) 持续集成测试(推荐最小集)
150 89
151 -# 3. 创建租户索引结构并导入数据(推荐)  
152 -./scripts/create_tenant_index.sh 162  
153 -curl -X POST "http://localhost:6004/indexer/reindex" \  
154 - -H "Content-Type: application/json" \  
155 - -d '{"tenant_id":"162","batch_size":500}' 90 +本仓库提供一套轻量、稳定、易维护的 CI 测试入口,覆盖以下服务契约:
156 91
157 -# 4. 启动核心服务(backend/indexer/frontend)  
158 -./run.sh 92 +- 搜索接口(search API)
  93 +- 索引接口(indexer API)
  94 +- 向量服务(embedding service)
  95 +- 翻译服务(translator service)
  96 +- 重排服务(reranker service)
159 97
160 -# (可选)附加启动 embedding / translator / reranker  
161 -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh  
162 -#  
163 -# 查看服务状态 / 停止  
164 -./scripts/service_ctl.sh status  
165 -./scripts/stop.sh 98 +本地运行:
166 99
167 -# 5. 调用文本搜索 API  
168 -curl -X POST http://localhost:6002/search/ \  
169 - -H "Content-Type: application/json" \  
170 - -H "X-Tenant-ID: 1" \  
171 - -d '{"query": "玩具", "size": 10}' 100 +```bash
  101 +source activate.sh
  102 +python -m pytest tests/ci -q
172 ``` 103 ```
173 104
174 -## 文档索引 105 +该测试集采用 mock/stub,**不依赖真实 ES/MySQL/大模型服务**,适合作为 PR 级快速回归门禁。
175 106
176 -| 文档 | 用途 |  
177 -|------|------|  
178 -| `docs/DEVELOPER_GUIDE.md` | **开发者开放指南**:全貌、原则、规范、检查清单 |  
179 -| `docs/QUICKSTART.md` | 新人上手:环境、服务、模块、请求 |  
180 -| `docs/Usage-Guide.md` | 运维:日志、多环境、故障排查 |  
181 -| `docs/搜索API速查表.md` | 搜索 API 参数速查 |  
182 -| `docs/搜索API对接指南.md` | 搜索 API 完整说明 |  
183 -| `docs/PROVIDER_ARCHITECTURE.md` | 翻译/向量/重排 provider 扩展 |  
184 -| `docs/MODULE_EXTENSION_SPEC.md` | 向量/重排后端可插拔规范 |  
185 -| `docs/环境配置说明.md` | 首次部署、新机器环境 |  
186 -| `docs/系统设计文档.md` | 架构与模块细节 |  
187 -  
188 -## 关键工作流指引  
189 -  
190 -- **数据构建 → MySQL → Elasticsearch**  
191 - - `scripts/mock_data.sh`:Tenant1 Mock + Tenant2 CSV 一条龙  
192 - - `scripts/create_tenant_index.sh <tenant_id>` + `POST /indexer/reindex`:推荐导入链路  
193 - - 详解:`测试数据指南.md`  
194 -  
195 -- **索引富化 & Java 对接**  
196 - - Java 索引程序负责:全量/增量调度 + 从 MySQL 查询 `shoplazza_product_spu/sku/option/...`  
197 - - Python `indexer` 模块负责:**MySQL 行 → ES doc** 的全部逻辑(多语言、翻译、向量、规格聚合等)  
198 - - 正式对接接口(推荐):  
199 - - `POST http://<indexer_host>:6004/indexer/build-docs`  
200 - - 入参:`tenant_id + items[{spu, skus, options}]`  
201 - - 出参:与 `mappings/search_products.json` 完全一致的 `docs` 列表,上游自行写入 ES  
202 - - 调试/自测接口(内部使用):  
203 - - `POST http://127.0.0.1:6004/indexer/build-docs-from-db`,只需要 `tenant_id + spu_ids`,由服务内部查库并返回 ES doc  
204 - - 详解:`indexer/README.md`、`docs/索引字段说明v2.md`  
205 -  
206 -- **搜索服务 & API**  
207 - - `api/`(FastAPI)承载 REST API,`search/` + `query/` 负责查询解析与下发  
208 - - API、分页、过滤、Facet、KNN 等:`搜索API对接指南.md`  
209 - - 对接案例、示例与错误码:`搜索API对接指南.md`、`Search-API-Examples.md`  
210 -  
211 -- **统一配置**  
212 - - 所有租户共享统一的索引结构和查询配置(硬编码)  
213 - - 索引 mapping: `mappings/search_products.json`  
214 - - 查询配置: `search/query_config.py`  
215 - - 详解:`基础配置指南.md`、`索引字段说明v2.md`  
216 -  
217 -## 仓库结构(概览) 107 +---
218 108
219 -```  
220 -api/ FastAPI 服务与路由  
221 -config/ 字段/索引/查询配置体系  
222 -indexer/ MySQL → ES 管道(mapping / transformer / bulk)  
223 -query/ 查询解析、改写、翻译、embedding  
224 -search/ 多语言构建、布尔解析、排序引擎  
225 -scripts/ 数据/服务脚本(mock_data, ingest, run 等)  
226 -frontend/ 简易调试页面  
227 -docs/ 运营及中文资料  
228 -``` 109 +## 7) 代码质量与持续继承要求
  110 +
  111 +- 新增功能必须补最小测试(至少覆盖 1 条成功路径 + 1 条参数异常路径)
  112 +- 修改公共协议时必须同步更新:
  113 + - `docs/QUICKSTART.md`
  114 + - 对应服务 README / API 文档
  115 + - `tests/ci` 契约用例
  116 +- 禁止新增“临时分支逻辑”绕过 provider/backend 工厂
  117 +- 优先减少重复实现,复用现有转换链路与配置解析入口
docs/QUICKSTART.md
@@ -27,6 +27,7 @@ @@ -27,6 +27,7 @@
27 4. [模块扩展规范(Embedding / Rerank)](#4-模块扩展规范embedding--rerank) 27 4. [模块扩展规范(Embedding / Rerank)](#4-模块扩展规范embedding--rerank)
28 5. [验证、日志与常见排障入口](#5-验证日志与常见排障入口) 28 5. [验证、日志与常见排障入口](#5-验证日志与常见排障入口)
29 6. [相关文档](#6-相关文档) 29 6. [相关文档](#6-相关文档)
  30 +7. [持续集成测试(最小可维护方案)](#7-持续集成测试最小可维护方案)
30 31
31 --- 32 ---
32 33
@@ -374,3 +375,32 @@ lsof -i :6004 @@ -374,3 +375,32 @@ lsof -i :6004
374 | `indexer/README.md` | 索引模块职责与接口 | 375 | `indexer/README.md` | 索引模块职责与接口 |
375 | `embeddings/README.md` | 向量化服务说明 | 376 | `embeddings/README.md` | 向量化服务说明 |
376 | `reranker/README.md` | 重排服务说明 | 377 | `reranker/README.md` | 重排服务说明 |
  378 +
  379 +---
  380 +
  381 +## 7. 持续集成测试(最小可维护方案)
  382 +
  383 +目标:让后续开发者在不依赖真实 ES/MySQL/模型服务的前提下,快速验证核心服务契约不被破坏。
  384 +
  385 +### 7.1 测试范围
  386 +
  387 +`tests/ci/test_service_api_contracts.py` 覆盖:
  388 +
  389 +- 搜索接口:`/search/`、`/search/image`、`/search/suggestions`
  390 +- 索引接口:`/indexer/reindex`、`/indexer/index`、`/indexer/build-docs`
  391 +- 向量服务:`/embed/text`、`/embed/image`
  392 +- 翻译服务:`/translate`、`/health`
  393 +- 重排服务:`/rerank`、`/health`
  394 +
  395 +### 7.2 运行方式
  396 +
  397 +```bash
  398 +source activate.sh
  399 +python -m pytest tests/ci -q
  400 +```
  401 +
  402 +### 7.3 设计取舍
  403 +
  404 +- 使用 mock/stub 注入依赖,确保测试快且稳定
  405 +- 重点测“接口契约与参数行为”,而不是底层模型质量
  406 +- 作为 PR 级门禁;真实环境联调放在运维/预发布流程
scripts/run_ci_tests.sh 0 → 100755
@@ -0,0 +1,9 @@ @@ -0,0 +1,9 @@
  1 +#!/bin/bash
  2 +
  3 +set -euo pipefail
  4 +
  5 +cd "$(dirname "$0")/.."
  6 +source ./activate.sh
  7 +
  8 +echo "Running CI contract tests..."
  9 +python -m pytest tests/ci -q
tests/ci/test_service_api_contracts.py 0 → 100644
@@ -0,0 +1,281 @@ @@ -0,0 +1,281 @@
  1 +from __future__ import annotations
  2 +
  3 +from types import SimpleNamespace
  4 +from typing import Any, Dict, List
  5 +
  6 +import numpy as np
  7 +import pytest
  8 +from fastapi.testclient import TestClient
  9 +
  10 +
  11 +class _FakeSearcher:
  12 + def search(self, **kwargs):
  13 + return SimpleNamespace(
  14 + results=[
  15 + {
  16 + "spu_id": "spu-1",
  17 + "title": "测试商品",
  18 + "price": 99.0,
  19 + "currency": "USD",
  20 + "in_stock": True,
  21 + "skus": [],
  22 + "relevance_score": 1.2,
  23 + }
  24 + ],
  25 + total=1,
  26 + max_score=1.2,
  27 + took_ms=8,
  28 + facets=[],
  29 + query_info={"normalized_query": kwargs.get("query", "")},
  30 + suggestions=[],
  31 + related_searches=[],
  32 + debug_info=None,
  33 + )
  34 +
  35 + def search_by_image(self, **kwargs):
  36 + return self.search(**kwargs)
  37 +
  38 +
  39 +class _FakeSuggestionService:
  40 + def search(self, **kwargs):
  41 + return {
  42 + "query": kwargs["query"],
  43 + "language": kwargs.get("language", "en"),
  44 + "resolved_language": kwargs.get("language", "en"),
  45 + "suggestions": [{"text": "iphone 15", "score": 1.0}],
  46 + "took_ms": 3,
  47 + }
  48 +
  49 +
  50 +@pytest.fixture
  51 +def search_client(monkeypatch):
  52 + import api.app as search_app
  53 +
  54 + monkeypatch.setattr(search_app, "init_service", lambda es_host="": None)
  55 + monkeypatch.setattr(search_app, "get_searcher", lambda: _FakeSearcher())
  56 + monkeypatch.setattr(search_app, "get_suggestion_service", lambda: _FakeSuggestionService())
  57 +
  58 + with TestClient(search_app.app) as client:
  59 + yield client
  60 +
  61 +
  62 +def test_search_api_contract(search_client: TestClient):
  63 + response = search_client.post(
  64 + "/search/",
  65 + headers={"X-Tenant-ID": "162"},
  66 + json={"query": "toy", "size": 5},
  67 + )
  68 + assert response.status_code == 200
  69 + data = response.json()
  70 + assert data["total"] == 1
  71 + assert data["results"][0]["spu_id"] == "spu-1"
  72 +
  73 +
  74 +def test_image_search_api_contract(search_client: TestClient):
  75 + response = search_client.post(
  76 + "/search/image",
  77 + headers={"X-Tenant-ID": "162"},
  78 + json={"image_url": "https://example.com/a.jpg", "size": 3},
  79 + )
  80 + assert response.status_code == 200
  81 + assert response.json()["results"][0]["spu_id"] == "spu-1"
  82 +
  83 +
  84 +def test_suggestion_api_contract(search_client: TestClient):
  85 + response = search_client.get(
  86 + "/search/suggestions?q=iph&size=5&language=en",
  87 + headers={"X-Tenant-ID": "162"},
  88 + )
  89 + assert response.status_code == 200
  90 + data = response.json()
  91 + assert data["query"] == "iph"
  92 + assert len(data["suggestions"]) == 1
  93 +
  94 +
  95 +class _FakeBulkService:
  96 + def bulk_index(self, tenant_id: str, recreate_index: bool, batch_size: int):
  97 + return {
  98 + "tenant_id": tenant_id,
  99 + "recreate_index": recreate_index,
  100 + "batch_size": batch_size,
  101 + "success": True,
  102 + }
  103 +
  104 +
  105 +class _FakeTransformer:
  106 + def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options):
  107 + return {
  108 + "tenant_id": tenant_id,
  109 + "spu_id": str(spu_row.get("id", "0")),
  110 + "title": {"zh": str(spu_row.get("title", ""))},
  111 + }
  112 +
  113 +
  114 +class _FakeIncrementalService:
  115 + def index_spus_to_es(self, es_client, tenant_id: str, spu_ids: List[str], delete_spu_ids=None):
  116 + return {
  117 + "tenant_id": tenant_id,
  118 + "spu_ids": [{"spu_id": s, "status": "indexed"} for s in spu_ids],
  119 + "delete_spu_ids": [],
  120 + "total": len(spu_ids),
  121 + "success_count": len(spu_ids),
  122 + "failed_count": 0,
  123 + }
  124 +
  125 + def _get_transformer_bundle(self, tenant_id: str):
  126 + return _FakeTransformer(), None, False
  127 +
  128 +
  129 +@pytest.fixture
  130 +def indexer_client(monkeypatch):
  131 + import api.indexer_app as indexer_app
  132 + import api.routes.indexer as indexer_routes
  133 +
  134 + monkeypatch.setattr(indexer_app, "init_indexer_service", lambda es_host="": None)
  135 + monkeypatch.setattr(indexer_routes, "get_bulk_indexing_service", lambda: _FakeBulkService())
  136 + monkeypatch.setattr(indexer_routes, "get_incremental_service", lambda: _FakeIncrementalService())
  137 + monkeypatch.setattr(indexer_routes, "get_es_client", lambda: object())
  138 +
  139 + with TestClient(indexer_app.app) as client:
  140 + yield client
  141 +
  142 +
  143 +def test_indexer_reindex_contract(indexer_client: TestClient):
  144 + response = indexer_client.post(
  145 + "/indexer/reindex",
  146 + json={"tenant_id": "162", "batch_size": 100},
  147 + )
  148 + assert response.status_code == 200
  149 + assert response.json()["success"] is True
  150 +
  151 +
  152 +def test_indexer_incremental_contract(indexer_client: TestClient):
  153 + response = indexer_client.post(
  154 + "/indexer/index",
  155 + json={"tenant_id": "162", "spu_ids": ["1001", "1002"]},
  156 + )
  157 + assert response.status_code == 200
  158 + data = response.json()
  159 + assert data["success_count"] == 2
  160 +
  161 +
  162 +def test_indexer_build_docs_contract(indexer_client: TestClient):
  163 + response = indexer_client.post(
  164 + "/indexer/build-docs",
  165 + json={
  166 + "tenant_id": "162",
  167 + "items": [{"spu": {"id": 1, "title": "T-shirt"}, "skus": [], "options": []}],
  168 + },
  169 + )
  170 + assert response.status_code == 200
  171 + data = response.json()
  172 + assert data["success_count"] == 1
  173 + assert data["docs"][0]["spu_id"] == "1"
  174 +
  175 +
  176 +class _FakeTextModel:
  177 + def encode_batch(self, texts, batch_size=32, device="cpu"):
  178 + return [np.array([0.1, 0.2, 0.3], dtype=np.float32) for _ in texts]
  179 +
  180 +
  181 +class _FakeImageModel:
  182 + def encode_image_urls(self, urls, batch_size=8):
  183 + return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls]
  184 +
  185 +
  186 +@pytest.fixture
  187 +def embedding_client():
  188 + import embeddings.server as emb_server
  189 +
  190 + emb_server.app.router.on_startup.clear()
  191 + emb_server._text_model = _FakeTextModel()
  192 + emb_server._image_model = _FakeImageModel()
  193 +
  194 + with TestClient(emb_server.app) as client:
  195 + yield client
  196 +
  197 +
  198 +def test_embedding_text_contract(embedding_client: TestClient):
  199 + response = embedding_client.post("/embed/text", json=["hello", "world"])
  200 + assert response.status_code == 200
  201 + data = response.json()
  202 + assert len(data) == 2
  203 + assert len(data[0]) == 3
  204 +
  205 +
  206 +def test_embedding_image_contract(embedding_client: TestClient):
  207 + response = embedding_client.post("/embed/image", json=["https://example.com/a.jpg"])
  208 + assert response.status_code == 200
  209 + assert len(response.json()[0]) == 3
  210 +
  211 +
  212 +class _FakeTranslator:
  213 + model = "qwen"
  214 + use_cache = True
  215 +
  216 + def translate(self, text: str, target_lang: str, source_lang: str | None = None, prompt: str | None = None):
  217 + return f"{text}-{target_lang}"
  218 +
  219 +
  220 +@pytest.fixture
  221 +def translator_client(monkeypatch):
  222 + import api.translator_app as translator_app
  223 +
  224 + translator_app.app.router.on_startup.clear()
  225 + monkeypatch.setattr(translator_app, "get_translator", lambda model="qwen": _FakeTranslator())
  226 +
  227 + with TestClient(translator_app.app) as client:
  228 + yield client
  229 +
  230 +
  231 +def test_translator_api_contract(translator_client: TestClient):
  232 + response = translator_client.post(
  233 + "/translate",
  234 + json={"text": "商品名称", "target_lang": "en", "source_lang": "zh"},
  235 + )
  236 + assert response.status_code == 200
  237 + assert response.json()["translated_text"] == "商品名称-en"
  238 +
  239 +
  240 +def test_translator_health_contract(translator_client: TestClient):
  241 + response = translator_client.get("/health")
  242 + assert response.status_code == 200
  243 + assert response.json()["status"] == "healthy"
  244 +
  245 +
  246 +class _FakeReranker:
  247 + _model_name = "fake-reranker"
  248 +
  249 + def score_with_meta(self, query: str, docs: List[str], normalize: bool = True):
  250 + scores = [float(i + 1) for i in range(len(docs))]
  251 + meta: Dict[str, Any] = {"input_docs": len(docs), "unique_docs": len(set(docs))}
  252 + return scores, meta
  253 +
  254 +
  255 +@pytest.fixture
  256 +def reranker_client():
  257 + import reranker.server as reranker_server
  258 +
  259 + reranker_server.app.router.on_startup.clear()
  260 + reranker_server._reranker = _FakeReranker()
  261 + reranker_server._backend_name = "fake"
  262 +
  263 + with TestClient(reranker_server.app) as client:
  264 + yield client
  265 +
  266 +
  267 +def test_reranker_api_contract(reranker_client: TestClient):
  268 + response = reranker_client.post(
  269 + "/rerank",
  270 + json={"query": "wireless mouse", "docs": ["doc-a", "doc-b"]},
  271 + )
  272 + assert response.status_code == 200
  273 + data = response.json()
  274 + assert data["scores"] == [1.0, 2.0]
  275 + assert data["meta"]["input_docs"] == 2
  276 +
  277 +
  278 +def test_reranker_health_contract(reranker_client: TestClient):
  279 + response = reranker_client.get("/health")
  280 + assert response.status_code == 200
  281 + assert response.json()["status"] == "ok"
tests/test_cloud_embedding.py
@@ -11,6 +11,8 @@ import time @@ -11,6 +11,8 @@ import time
11 from datetime import datetime 11 from datetime import datetime
12 from pathlib import Path 12 from pathlib import Path
13 13
  14 +import pytest
  15 +
14 # Add parent directory to path 16 # Add parent directory to path
15 sys.path.insert(0, str(Path(__file__).parent.parent)) 17 sys.path.insert(0, str(Path(__file__).parent.parent))
16 18
@@ -44,6 +46,7 @@ def read_queries(file_path: str, limit: int = 100) -&gt; list: @@ -44,6 +46,7 @@ def read_queries(file_path: str, limit: int = 100) -&gt; list:
44 return queries 46 return queries
45 47
46 48
  49 +@pytest.mark.skip(reason="Requires data file and DASHSCOPE_API_KEY; run manually when needed")
47 def test_cloud_embedding(queries_file: str, num_queries: int = 100): 50 def test_cloud_embedding(queries_file: str, num_queries: int = 100):
48 """ 51 """
49 Test cloud embedding with queries from file. 52 Test cloud embedding with queries from file.
tests/test_cnclip_service.py
@@ -13,11 +13,17 @@ CN-CLIP 服务测试脚本 @@ -13,11 +13,17 @@ CN-CLIP 服务测试脚本
13 """ 13 """
14 14
15 import sys 15 import sys
16 -import numpy as np  
17 -from clip_client import Client  
18 16
  17 +import pytest
19 18
20 -def test_encoding(client, test_name, inputs): 19 +try:
  20 + import numpy as np
  21 + from clip_client import Client
  22 +except ImportError:
  23 + pytest.skip("clip_client not installed (optional clip-as-service client)", allow_module_level=True)
  24 +
  25 +
  26 +def _test_encoding(client, test_name, inputs):
21 """测试编码功能""" 27 """测试编码功能"""
22 print(f"\n{test_name}...") 28 print(f"\n{test_name}...")
23 try: 29 try:
@@ -74,21 +80,21 @@ def main(): @@ -74,21 +80,21 @@ def main():
74 results = [] 80 results = []
75 81
76 # 测试1: 文本编码 82 # 测试1: 文本编码
77 - results.append(test_encoding( 83 + results.append(_test_encoding(
78 client, 84 client,
79 "测试1: 编码文本", 85 "测试1: 编码文本",
80 ['这是一个测试文本', '另一个测试文本'] 86 ['这是一个测试文本', '另一个测试文本']
81 )) 87 ))
82 88
83 # 测试2: 图像编码 89 # 测试2: 图像编码
84 - results.append(test_encoding( 90 + results.append(_test_encoding(
85 client, 91 client,
86 "测试2: 编码图像(远程 URL)", 92 "测试2: 编码图像(远程 URL)",
87 ['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] 93 ['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg']
88 )) 94 ))
89 95
90 # 测试3: 混合编码 96 # 测试3: 混合编码
91 - results.append(test_encoding( 97 + results.append(_test_encoding(
92 client, 98 client,
93 "测试3: 混合编码(文本和图像)", 99 "测试3: 混合编码(文本和图像)",
94 ['这是一段文本', 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] 100 ['这是一段文本', 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg']