Commit 7299bae6396856b85f415a780200216d2d97fede
1 parent
bc54124c
tests
Showing
7 changed files
with
435 additions
and
723 deletions
Show diff stats
.github/workflows/test.yml
| 1 | -name: saas-search Test Pipeline | |
| 1 | +name: CI - Service Contracts | |
| 2 | 2 | |
| 3 | 3 | on: |
| 4 | 4 | push: |
| 5 | - branches: [ main, master, develop ] | |
| 5 | + branches: [main, master, develop] | |
| 6 | 6 | pull_request: |
| 7 | - branches: [ main, master, develop ] | |
| 8 | - workflow_dispatch: # 允许手动触发 | |
| 9 | - | |
| 10 | -env: | |
| 11 | - PYTHON_VERSION: '3.9' | |
| 12 | - NODE_VERSION: '16' | |
| 7 | + branches: [main, master, develop] | |
| 8 | + workflow_dispatch: | |
| 13 | 9 | |
| 14 | 10 | jobs: |
| 15 | - # 代码质量检查 | |
| 16 | - code-quality: | |
| 17 | - runs-on: ubuntu-latest | |
| 18 | - name: Code Quality Check | |
| 19 | - | |
| 20 | - steps: | |
| 21 | - - name: Checkout code | |
| 22 | - uses: actions/checkout@v4 | |
| 23 | - | |
| 24 | - - name: Set up Python | |
| 25 | - uses: actions/setup-python@v4 | |
| 26 | - with: | |
| 27 | - python-version: ${{ env.PYTHON_VERSION }} | |
| 28 | - | |
| 29 | - - name: Install dependencies | |
| 30 | - run: | | |
| 31 | - python -m pip install --upgrade pip | |
| 32 | - pip install flake8 black isort mypy pylint | |
| 33 | - pip install -r requirements.txt | |
| 34 | - | |
| 35 | - - name: Run Black (code formatting) | |
| 36 | - run: | | |
| 37 | - black --check --diff . | |
| 38 | - | |
| 39 | - - name: Run isort (import sorting) | |
| 40 | - run: | | |
| 41 | - isort --check-only --diff . | |
| 42 | - | |
| 43 | - - name: Run Flake8 (linting) | |
| 44 | - run: | | |
| 45 | - flake8 --max-line-length=100 --ignore=E203,W503 . | |
| 46 | - | |
| 47 | - - name: Run MyPy (type checking) | |
| 48 | - run: | | |
| 49 | - mypy --ignore-missing-imports --no-strict-optional . | |
| 50 | - | |
| 51 | - - name: Run Pylint | |
| 52 | - run: | | |
| 53 | - pylint --disable=C0114,C0115,C0116 --errors-only . | |
| 54 | - | |
| 55 | - # 单元测试 | |
| 56 | - unit-tests: | |
| 57 | - runs-on: ubuntu-latest | |
| 58 | - name: Unit Tests | |
| 59 | - | |
| 60 | - strategy: | |
| 61 | - matrix: | |
| 62 | - python-version: ['3.8', '3.9', '3.10', '3.11'] | |
| 63 | - | |
| 64 | - steps: | |
| 65 | - - name: Checkout code | |
| 66 | - uses: actions/checkout@v4 | |
| 67 | - | |
| 68 | - - name: Set up Python ${{ matrix.python-version }} | |
| 69 | - uses: actions/setup-python@v4 | |
| 70 | - with: | |
| 71 | - python-version: ${{ matrix.python-version }} | |
| 72 | - | |
| 73 | - - name: Cache pip dependencies | |
| 74 | - uses: actions/cache@v3 | |
| 75 | - with: | |
| 76 | - path: ~/.cache/pip | |
| 77 | - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt') }} | |
| 78 | - restore-keys: | | |
| 79 | - ${{ runner.os }}-pip- | |
| 80 | - | |
| 81 | - - name: Install dependencies | |
| 82 | - run: | | |
| 83 | - python -m pip install --upgrade pip | |
| 84 | - pip install pytest pytest-cov pytest-json-report | |
| 85 | - pip install -r requirements.txt | |
| 86 | - | |
| 87 | - - name: Create test logs directory | |
| 88 | - run: mkdir -p test_logs | |
| 89 | - | |
| 90 | - - name: Run unit tests | |
| 91 | - run: | | |
| 92 | - python -m pytest tests/unit/ \ | |
| 93 | - -v \ | |
| 94 | - --tb=short \ | |
| 95 | - --cov=. \ | |
| 96 | - --cov-report=xml \ | |
| 97 | - --cov-report=html \ | |
| 98 | - --cov-report=term-missing \ | |
| 99 | - --json-report \ | |
| 100 | - --json-report-file=test_logs/unit_test_results.json | |
| 101 | - | |
| 102 | - - name: Upload coverage to Codecov | |
| 103 | - uses: codecov/codecov-action@v3 | |
| 104 | - with: | |
| 105 | - file: ./coverage.xml | |
| 106 | - flags: unittests | |
| 107 | - name: codecov-umbrella | |
| 108 | - | |
| 109 | - - name: Upload unit test results | |
| 110 | - uses: actions/upload-artifact@v3 | |
| 111 | - if: always() | |
| 112 | - with: | |
| 113 | - name: unit-test-results-${{ matrix.python-version }} | |
| 114 | - path: | | |
| 115 | - test_logs/unit_test_results.json | |
| 116 | - htmlcov/ | |
| 117 | - | |
| 118 | - # 集成测试 | |
| 119 | - integration-tests: | |
| 120 | - runs-on: ubuntu-latest | |
| 121 | - name: Integration Tests | |
| 122 | - needs: [code-quality, unit-tests] | |
| 123 | - | |
| 124 | - services: | |
| 125 | - elasticsearch: | |
| 126 | - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0 | |
| 127 | - env: | |
| 128 | - discovery.type: single-node | |
| 129 | - ES_JAVA_OPTS: -Xms1g -Xmx1g | |
| 130 | - xpack.security.enabled: false | |
| 131 | - ports: | |
| 132 | - - 9200:9200 | |
| 133 | - options: >- | |
| 134 | - --health-cmd "curl http://localhost:9200/_cluster/health" | |
| 135 | - --health-interval 10s | |
| 136 | - --health-timeout 5s | |
| 137 | - --health-retries 10 | |
| 138 | - | |
| 139 | - steps: | |
| 140 | - - name: Checkout code | |
| 141 | - uses: actions/checkout@v4 | |
| 142 | - | |
| 143 | - - name: Set up Python | |
| 144 | - uses: actions/setup-python@v4 | |
| 145 | - with: | |
| 146 | - python-version: ${{ env.PYTHON_VERSION }} | |
| 147 | - | |
| 148 | - - name: Install system dependencies | |
| 149 | - run: | | |
| 150 | - sudo apt-get update | |
| 151 | - sudo apt-get install -y curl | |
| 152 | - | |
| 153 | - - name: Install Python dependencies | |
| 154 | - run: | | |
| 155 | - python -m pip install --upgrade pip | |
| 156 | - pip install pytest pytest-json-report httpx | |
| 157 | - pip install -r requirements.txt | |
| 158 | - | |
| 159 | - - name: Create test logs directory | |
| 160 | - run: mkdir -p test_logs | |
| 161 | - | |
| 162 | - - name: Wait for Elasticsearch | |
| 163 | - run: | | |
| 164 | - echo "Waiting for Elasticsearch to be ready..." | |
| 165 | - for i in {1..30}; do | |
| 166 | - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then | |
| 167 | - echo "Elasticsearch is ready" | |
| 168 | - break | |
| 169 | - fi | |
| 170 | - echo "Attempt $i/30: Elasticsearch not ready yet" | |
| 171 | - sleep 2 | |
| 172 | - done | |
| 173 | - | |
| 174 | - - name: Setup test index | |
| 175 | - run: | | |
| 176 | - curl -X PUT http://localhost:9200/test_products \ | |
| 177 | - -H 'Content-Type: application/json' \ | |
| 178 | - -d '{ | |
| 179 | - "settings": { | |
| 180 | - "number_of_shards": 1, | |
| 181 | - "number_of_replicas": 0 | |
| 182 | - }, | |
| 183 | - "mappings": { | |
| 184 | - "properties": { | |
| 185 | - "name": {"type": "text"}, | |
| 186 | - "brand_name": {"type": "text"}, | |
| 187 | - "tags": {"type": "text"}, | |
| 188 | - "price": {"type": "double"}, | |
| 189 | - "category_id": {"type": "integer"}, | |
| 190 | - "spu_id": {"type": "keyword"}, | |
| 191 | - "text_embedding": {"type": "dense_vector", "dims": 1024} | |
| 192 | - } | |
| 193 | - } | |
| 194 | - }' | |
| 195 | - | |
| 196 | - - name: Insert test data | |
| 197 | - run: | | |
| 198 | - curl -X POST http://localhost:9200/test_products/_bulk \ | |
| 199 | - -H 'Content-Type: application/json' \ | |
| 200 | - --data-binary @- << 'EOF' | |
| 201 | -{"index": {"_id": "1"}} | |
| 202 | -{"name": "红色连衣裙", "brand_name": "测试品牌", "tags": ["红色", "连衣裙", "女装"], "price": 299.0, "category_id": 1, "spu_id": "dress_001"} | |
| 203 | -{"index": {"_id": "2"}} | |
| 204 | -{"name": "蓝色连衣裙", "brand_name": "测试品牌", "tags": ["蓝色", "连衣裙", "女装"], "price": 399.0, "category_id": 1, "spu_id": "dress_002"} | |
| 205 | -{"index": {"_id": "3"}} | |
| 206 | -{"name": "智能手机", "brand_name": "科技品牌", "tags": ["智能", "手机", "数码"], "price": 2999.0, "category_id": 2, "spu_id": "phone_001"} | |
| 207 | -EOF | |
| 208 | - | |
| 209 | - - name: Run integration tests | |
| 210 | - env: | |
| 211 | - ES_HOST: http://localhost:9200 | |
| 212 | - TENANT_ID: test_tenant | |
| 213 | - TESTING_MODE: true | |
| 214 | - run: | | |
| 215 | - python -m pytest tests/integration/ \ | |
| 216 | - -v \ | |
| 217 | - --tb=short \ | |
| 218 | - -m "not slow" \ | |
| 219 | - --json-report \ | |
| 220 | - --json-report-file=test_logs/integration_test_results.json | |
| 221 | - | |
| 222 | - - name: Upload integration test results | |
| 223 | - uses: actions/upload-artifact@v3 | |
| 224 | - if: always() | |
| 225 | - with: | |
| 226 | - name: integration-test-results | |
| 227 | - path: test_logs/integration_test_results.json | |
| 228 | - | |
| 229 | - # API测试 | |
| 230 | - api-tests: | |
| 231 | - runs-on: ubuntu-latest | |
| 232 | - name: API Tests | |
| 233 | - needs: [code-quality, unit-tests] | |
| 234 | - | |
| 235 | - services: | |
| 236 | - elasticsearch: | |
| 237 | - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0 | |
| 238 | - env: | |
| 239 | - discovery.type: single-node | |
| 240 | - ES_JAVA_OPTS: -Xms1g -Xmx1g | |
| 241 | - xpack.security.enabled: false | |
| 242 | - ports: | |
| 243 | - - 9200:9200 | |
| 244 | - options: >- | |
| 245 | - --health-cmd "curl http://localhost:9200/_cluster/health" | |
| 246 | - --health-interval 10s | |
| 247 | - --health-timeout 5s | |
| 248 | - --health-retries 10 | |
| 249 | - | |
| 250 | - steps: | |
| 251 | - - name: Checkout code | |
| 252 | - uses: actions/checkout@v4 | |
| 253 | - | |
| 254 | - - name: Set up Python | |
| 255 | - uses: actions/setup-python@v4 | |
| 256 | - with: | |
| 257 | - python-version: ${{ env.PYTHON_VERSION }} | |
| 258 | - | |
| 259 | - - name: Install system dependencies | |
| 260 | - run: | | |
| 261 | - sudo apt-get update | |
| 262 | - sudo apt-get install -y curl | |
| 263 | - | |
| 264 | - - name: Install Python dependencies | |
| 265 | - run: | | |
| 266 | - python -m pip install --upgrade pip | |
| 267 | - pip install pytest pytest-json-report httpx | |
| 268 | - pip install -r requirements.txt | |
| 269 | - | |
| 270 | - - name: Create test logs directory | |
| 271 | - run: mkdir -p test_logs | |
| 272 | - | |
| 273 | - - name: Wait for Elasticsearch | |
| 274 | - run: | | |
| 275 | - echo "Waiting for Elasticsearch to be ready..." | |
| 276 | - for i in {1..30}; do | |
| 277 | - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then | |
| 278 | - echo "Elasticsearch is ready" | |
| 279 | - break | |
| 280 | - fi | |
| 281 | - echo "Attempt $i/30: Elasticsearch not ready yet" | |
| 282 | - sleep 2 | |
| 283 | - done | |
| 284 | - | |
| 285 | - - name: Setup test index and data | |
| 286 | - run: | | |
| 287 | - # 创建索引 | |
| 288 | - curl -X PUT http://localhost:9200/test_products \ | |
| 289 | - -H 'Content-Type: application/json' \ | |
| 290 | - -d '{ | |
| 291 | - "settings": {"number_of_shards": 1, "number_of_replicas": 0}, | |
| 292 | - "mappings": { | |
| 293 | - "properties": { | |
| 294 | - "name": {"type": "text"}, "brand_name": {"type": "text"}, | |
| 295 | - "tags": {"type": "text"}, "price": {"type": "double"}, | |
| 296 | - "category_id": {"type": "integer"}, "spu_id": {"type": "keyword"}, | |
| 297 | - "text_embedding": {"type": "dense_vector", "dims": 1024} | |
| 298 | - } | |
| 299 | - } | |
| 300 | - }' | |
| 301 | - | |
| 302 | - # 插入测试数据 | |
| 303 | - curl -X POST http://localhost:9200/test_products/_bulk \ | |
| 304 | - -H 'Content-Type: application/json' \ | |
| 305 | - --data-binary @- << 'EOF' | |
| 306 | -{"index": {"_id": "1"}} | |
| 307 | -{"name": "红色连衣裙", "brand_name": "测试品牌", "tags": ["红色", "连衣裙", "女装"], "price": 299.0, "category_id": 1, "spu_id": "dress_001"} | |
| 308 | -{"index": {"_id": "2"}} | |
| 309 | -{"name": "蓝色连衣裙", "brand_name": "测试品牌", "tags": ["蓝色", "连衣裙", "女装"], "price": 399.0, "category_id": 1, "spu_id": "dress_002"} | |
| 310 | -EOF | |
| 311 | - | |
| 312 | - - name: Start API service | |
| 313 | - env: | |
| 314 | - ES_HOST: http://localhost:9200 | |
| 315 | - TENANT_ID: test_tenant | |
| 316 | - API_HOST: 127.0.0.1 | |
| 317 | - API_PORT: 6003 | |
| 318 | - TESTING_MODE: true | |
| 319 | - run: | | |
| 320 | - python -m api.app \ | |
| 321 | - --host $API_HOST \ | |
| 322 | - --port $API_PORT \ | |
| 323 | - --tenant $TENANT_ID \ | |
| 324 | - --es-host $ES_HOST & | |
| 325 | - echo $! > api.pid | |
| 326 | - | |
| 327 | - # 等待API服务启动 | |
| 328 | - for i in {1..30}; do | |
| 329 | - if curl -s http://$API_HOST:$API_PORT/health > /dev/null; then | |
| 330 | - echo "API service is ready" | |
| 331 | - break | |
| 332 | - fi | |
| 333 | - echo "Attempt $i/30: API service not ready yet" | |
| 334 | - sleep 2 | |
| 335 | - done | |
| 336 | - | |
| 337 | - - name: Run API tests | |
| 338 | - env: | |
| 339 | - ES_HOST: http://localhost:9200 | |
| 340 | - API_HOST: 127.0.0.1 | |
| 341 | - API_PORT: 6003 | |
| 342 | - TENANT_ID: test_tenant | |
| 343 | - TESTING_MODE: true | |
| 344 | - run: | | |
| 345 | - python -m pytest tests/integration/test_api_integration.py \ | |
| 346 | - -v \ | |
| 347 | - --tb=short \ | |
| 348 | - --json-report \ | |
| 349 | - --json-report-file=test_logs/api_test_results.json | |
| 350 | - | |
| 351 | - - name: Stop API service | |
| 352 | - if: always() | |
| 353 | - run: | | |
| 354 | - if [ -f api.pid ]; then | |
| 355 | - kill $(cat api.pid) || true | |
| 356 | - rm api.pid | |
| 357 | - fi | |
| 358 | - | |
| 359 | - - name: Upload API test results | |
| 360 | - uses: actions/upload-artifact@v3 | |
| 361 | - if: always() | |
| 362 | - with: | |
| 363 | - name: api-test-results | |
| 364 | - path: test_logs/api_test_results.json | |
| 365 | - | |
| 366 | - # 性能测试 | |
| 367 | - performance-tests: | |
| 368 | - runs-on: ubuntu-latest | |
| 369 | - name: Performance Tests | |
| 370 | - needs: [code-quality, unit-tests] | |
| 371 | - if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' | |
| 372 | - | |
| 373 | - services: | |
| 374 | - elasticsearch: | |
| 375 | - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0 | |
| 376 | - env: | |
| 377 | - discovery.type: single-node | |
| 378 | - ES_JAVA_OPTS: -Xms2g -Xmx2g | |
| 379 | - xpack.security.enabled: false | |
| 380 | - ports: | |
| 381 | - - 9200:9200 | |
| 382 | - options: >- | |
| 383 | - --health-cmd "curl http://localhost:9200/_cluster/health" | |
| 384 | - --health-interval 10s | |
| 385 | - --health-timeout 5s | |
| 386 | - --health-retries 10 | |
| 387 | - | |
| 388 | - steps: | |
| 389 | - - name: Checkout code | |
| 390 | - uses: actions/checkout@v4 | |
| 391 | - | |
| 392 | - - name: Set up Python | |
| 393 | - uses: actions/setup-python@v4 | |
| 394 | - with: | |
| 395 | - python-version: ${{ env.PYTHON_VERSION }} | |
| 396 | - | |
| 397 | - - name: Install dependencies | |
| 398 | - run: | | |
| 399 | - python -m pip install --upgrade pip | |
| 400 | - pip install pytest locust | |
| 401 | - pip install -r requirements.txt | |
| 402 | - | |
| 403 | - - name: Wait for Elasticsearch | |
| 404 | - run: | | |
| 405 | - echo "Waiting for Elasticsearch to be ready..." | |
| 406 | - for i in {1..30}; do | |
| 407 | - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then | |
| 408 | - echo "Elasticsearch is ready" | |
| 409 | - break | |
| 410 | - fi | |
| 411 | - sleep 2 | |
| 412 | - done | |
| 413 | - | |
| 414 | - - name: Setup test data | |
| 415 | - run: | | |
| 416 | - # 创建并填充测试索引 | |
| 417 | - python scripts/create_test_data.py --count 1000 | |
| 418 | - | |
| 419 | - - name: Run performance tests | |
| 420 | - env: | |
| 421 | - ES_HOST: http://localhost:9200 | |
| 422 | - TESTING_MODE: true | |
| 423 | - run: | | |
| 424 | - python scripts/run_performance_tests.py | |
| 425 | - | |
| 426 | - - name: Upload performance results | |
| 427 | - uses: actions/upload-artifact@v3 | |
| 428 | - if: always() | |
| 429 | - with: | |
| 430 | - name: performance-test-results | |
| 431 | - path: performance_results/ | |
| 432 | - | |
| 433 | - # 安全扫描 | |
| 434 | - security-scan: | |
| 11 | + service-contract-tests: | |
| 435 | 12 | runs-on: ubuntu-latest |
| 436 | - name: Security Scan | |
| 437 | - needs: [code-quality] | |
| 13 | + name: Service Contract Tests | |
| 438 | 14 | |
| 439 | 15 | steps: |
| 440 | - - name: Checkout code | |
| 441 | - uses: actions/checkout@v4 | |
| 442 | - | |
| 443 | - - name: Set up Python | |
| 444 | - uses: actions/setup-python@v4 | |
| 445 | - with: | |
| 446 | - python-version: ${{ env.PYTHON_VERSION }} | |
| 447 | - | |
| 448 | - - name: Install security scanning tools | |
| 449 | - run: | | |
| 450 | - python -m pip install --upgrade pip | |
| 451 | - pip install safety bandit | |
| 452 | - | |
| 453 | - - name: Run Safety (dependency check) | |
| 454 | - run: | | |
| 455 | - safety check --json --output safety_report.json || true | |
| 456 | - | |
| 457 | - - name: Run Bandit (security linter) | |
| 458 | - run: | | |
| 459 | - bandit -r . -f json -o bandit_report.json || true | |
| 460 | - | |
| 461 | - - name: Upload security reports | |
| 462 | - uses: actions/upload-artifact@v3 | |
| 463 | - if: always() | |
| 464 | - with: | |
| 465 | - name: security-reports | |
| 466 | - path: | | |
| 467 | - safety_report.json | |
| 468 | - bandit_report.json | |
| 469 | - | |
| 470 | - # 测试结果汇总 | |
| 471 | - test-summary: | |
| 472 | - runs-on: ubuntu-latest | |
| 473 | - name: Test Summary | |
| 474 | - needs: [unit-tests, integration-tests, api-tests, security-scan] | |
| 475 | - if: always() | |
| 476 | - | |
| 477 | - steps: | |
| 478 | - - name: Checkout code | |
| 479 | - uses: actions/checkout@v4 | |
| 480 | - | |
| 481 | - - name: Download all test artifacts | |
| 482 | - uses: actions/download-artifact@v3 | |
| 483 | - | |
| 484 | - - name: Generate test summary | |
| 485 | - run: | | |
| 486 | - python scripts/generate_test_summary.py | |
| 487 | - | |
| 488 | - - name: Upload final report | |
| 489 | - uses: actions/upload-artifact@v3 | |
| 490 | - with: | |
| 491 | - name: final-test-report | |
| 492 | - path: final_test_report.* | |
| 493 | - | |
| 494 | - - name: Comment PR with results | |
| 495 | - if: github.event_name == 'pull_request' | |
| 496 | - uses: actions/github-script@v6 | |
| 497 | - with: | |
| 498 | - script: | | |
| 499 | - const fs = require('fs'); | |
| 500 | - | |
| 501 | - // 读取测试报告 | |
| 502 | - let reportContent = ''; | |
| 503 | - try { | |
| 504 | - reportContent = fs.readFileSync('final_test_report.txt', 'utf8'); | |
| 505 | - } catch (e) { | |
| 506 | - console.log('Could not read report file'); | |
| 507 | - return; | |
| 508 | - } | |
| 509 | - | |
| 510 | - // 提取摘要信息 | |
| 511 | - const lines = reportContent.split('\n'); | |
| 512 | - let summary = ''; | |
| 513 | - let inSummary = false; | |
| 514 | - | |
| 515 | - for (const line of lines) { | |
| 516 | - if (line.includes('测试摘要')) { | |
| 517 | - inSummary = true; | |
| 518 | - continue; | |
| 519 | - } | |
| 520 | - if (inSummary && line.includes('测试套件详情')) { | |
| 521 | - break; | |
| 522 | - } | |
| 523 | - if (inSummary && line.trim()) { | |
| 524 | - summary += line + '\n'; | |
| 525 | - } | |
| 526 | - } | |
| 527 | - | |
| 528 | - // 构建评论内容 | |
| 529 | - const comment = `## 🧪 测试报告\n\n${summary}\n\n详细的测试报告请查看 [Artifacts](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) 部分。`; | |
| 530 | - | |
| 531 | - // 发送评论 | |
| 532 | - github.rest.issues.createComment({ | |
| 533 | - issue_number: context.issue.number, | |
| 534 | - owner: context.repo.owner, | |
| 535 | - repo: context.repo.repo, | |
| 536 | - body: comment | |
| 537 | - }); | |
| 538 | 16 | \ No newline at end of file |
| 17 | + - name: Checkout code | |
| 18 | + uses: actions/checkout@v4 | |
| 19 | + | |
| 20 | + - name: Set up Python | |
| 21 | + uses: actions/setup-python@v5 | |
| 22 | + with: | |
| 23 | + python-version: "3.11" | |
| 24 | + | |
| 25 | + - name: Install dependencies | |
| 26 | + run: | | |
| 27 | + python -m pip install --upgrade pip | |
| 28 | + pip install -r requirements.txt | |
| 29 | + | |
| 30 | + - name: Run CI contract tests | |
| 31 | + run: | | |
| 32 | + python -m pytest tests/ci -q | |
| 539 | 33 | \ No newline at end of file | ... | ... |
README.md
| 1 | -# TODO | |
| 2 | - | |
| 3 | -**多语言索引**:已改为可配置的 `index_languages`(默认为 `["en", "zh"]`),商家可勾选主市场语言。支持语言见 `config.tenant_config_loader.SUPPORTED_INDEX_LANGUAGES`(含 en, zh, zh_tw, ru, ja, ko, es, fr, pt, de, it, th, vi, id, ms, ar, hi, he, my, ta, ur, bn, pl, nl, ro, tr, km, lo, yue, cs, el, sv, hu, da, fi, uk, bg 等)。 | |
| 4 | - | |
| 5 | -前端: | |
| 6 | -搜索模态框 | |
| 7 | -点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。 | |
| 8 | -但是要注意:搜索过程中,后端不要触发翻译(因为输入过程中的query翻译结果会有问题),因此需要增加一个参数:搜索类型,默认为当前的回车后发起的搜索,如果是输入过程中的结果刷新则类型为typing。 | |
| 9 | - | |
| 10 | - | |
| 11 | - | |
| 12 | -多语言: | |
| 13 | -语义: | |
| 14 | -多语言: | |
| 15 | -1. dis_max的方式 | |
| 16 | -"query" : { | |
| 17 | - "dis_max" : { | |
| 18 | - "queries" : [ | |
| 19 | - {"match" : { "title.en" : xxx }}, | |
| 20 | - {"match" : { "title.zh" : xxx }}, | |
| 21 | - {"match" : { "title_xx" : xxx }} | |
| 22 | - ], | |
| 23 | - "tie_breakler" : 0.8 | |
| 24 | - } | |
| 25 | -} | |
| 26 | - | |
| 27 | - | |
| 28 | - | |
| 29 | -"corss_field": | |
| 30 | -"multi_match" : { | |
| 31 | - "query" : | |
| 32 | - "fields" : [...], | |
| 33 | - "type": "cross_fields", | |
| 34 | - "operator" : "and" | |
| 35 | -} | |
| 36 | - | |
| 37 | - | |
| 38 | -支持英文的拼写接错: | |
| 39 | -title: multi_field | |
| 40 | -"query" : { | |
| 41 | - "query_string" : { | |
| 42 | - "query": "xxx", | |
| 43 | - "default_field": "title.ngram", | |
| 44 | - "minimum_should_match": "85%" | |
| 45 | - } | |
| 46 | -} | |
| 47 | - | |
| 48 | - | |
| 49 | - | |
| 50 | -业务提权: | |
| 51 | -rescore: window_size query_weight rescore_query_weight | |
| 52 | - | |
| 53 | -function_score: | |
| 54 | -boost | |
| 55 | -以及 可以把每个子查询用function_score包一下 | |
| 56 | - | |
| 57 | - | |
| 58 | - | |
| 59 | - | |
| 60 | -query anchor | |
| 61 | -我想给elasticsearch 增加字段 query anchor ,即哪些query点击到了这个doc,一个doc下面有多个query anchor,每个query anchor又有这两个属性:weight、dweight,分别代表 query在doc下的点击分布权重、doc在query下的点击分布权重。请问该如何设计这两个ES字段。 | |
| 62 | - | |
| 63 | -需要有zh en两套query anchor,因为他们的解析器不一样。 | |
| 64 | - | |
| 65 | -他的功能是辅助召回和排序。我搜索一个query,那么每个query跟 doc中的query anchor的相关性,也就是,除了将query到title 和keywords / brief等文本字段中搜索,也到 query anchor中搜索,从而辅助召回和相关性的计算。 | |
| 66 | - | |
| 67 | - | |
| 1 | +# 电商搜索引擎 SaaS | |
| 68 | 2 | |
| 3 | +多租户、可配置、可扩展的电商搜索平台(Shoplazza 等独立站场景)。 | |
| 69 | 4 | |
| 5 | +README 用于给后续开发者建立统一认知:**系统框架、模块边界、设计原则、研发流程与 CI 测试入口**,帮助持续迭代时避免分叉设计与冗余代码。 | |
| 70 | 6 | |
| 71 | -# 电商搜索引擎 SaaS | |
| 7 | +--- | |
| 72 | 8 | |
| 73 | -一个针对跨境独立站(店匠 Shoplazza 等)的多租户可配置搜索平台。README 作为项目导航入口,帮助你在不同阶段定位到更详细的文档。 | |
| 9 | +## 1) 项目目标与边界 | |
| 74 | 10 | |
| 11 | +- **目标**:在统一架构下支持关键词检索、语义检索、分面过滤、多语言、重排、图片检索。 | |
| 12 | +- **边界**:本仓库负责搜索核心能力与服务编排;业务方通过标准 HTTP API 对接。 | |
| 13 | +- **核心约束**: | |
| 14 | + - 调用方稳定(API/Provider 契约优先) | |
| 15 | + - 配置单一来源(`config/config.yaml` + `.env` 覆盖) | |
| 16 | + - 扩展优先走插件化(provider/backend),避免散落式分叉实现 | |
| 75 | 17 | |
| 76 | -## 项目环境 | |
| 18 | +--- | |
| 77 | 19 | |
| 78 | -以项目根目录的 **`activate.sh`** 为准(**优先激活 venv:`./.venv`,并加载 `.env`;兼容 Conda 回退**): | |
| 20 | +## 2) 快速开始 | |
| 79 | 21 | |
| 80 | 22 | ```bash |
| 81 | -# 推荐:首次创建 venv(默认安装基础依赖) | |
| 23 | +# 首次创建环境(默认基础依赖) | |
| 82 | 24 | ./scripts/create_venv.sh |
| 83 | - | |
| 84 | -# 如需本地向量/图片编码(会安装 torch/transformers 等重依赖) | |
| 85 | -# INSTALL_ML=1 ./scripts/create_venv.sh | |
| 86 | 25 | source activate.sh |
| 87 | -``` | |
| 88 | 26 | |
| 89 | -新机器首次需创建环境,见 `docs/环境配置说明.md`(推荐 venv;Conda 为兼容旧流程)。 | |
| 27 | +# 启动核心服务(backend/indexer/frontend) | |
| 28 | +./run.sh | |
| 90 | 29 | |
| 91 | -## 测试pipeline | |
| 30 | +# 可选:附加能力服务 | |
| 31 | +START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh | |
| 92 | 32 | |
| 93 | -1. | |
| 94 | -店铺1 tenant_id=162: | |
| 95 | -fake数据 生成商品导入数据 提交到店匠的店铺: | |
| 96 | -cd /data/saas-search && source activate.sh && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx | |
| 33 | +# 查看状态 | |
| 34 | +./scripts/service_ctl.sh status | |
| 35 | +``` | |
| 97 | 36 | |
| 98 | -店铺2 tenant_id= | |
| 37 | +核心端口: | |
| 99 | 38 | |
| 39 | +- `6002` backend(`/search/*`, `/admin/*`) | |
| 40 | +- `6004` indexer(`/indexer/*`) | |
| 41 | +- `6003` frontend | |
| 42 | +- `6005` embedding(可选) | |
| 43 | +- `6006` translator(可选) | |
| 44 | +- `6007` reranker(可选) | |
| 100 | 45 | |
| 101 | -2. 后端:自动同步到mysql | |
| 46 | +更完整示例见 `docs/QUICKSTART.md`。 | |
| 102 | 47 | |
| 103 | -3. mysql到ES: | |
| 48 | +--- | |
| 104 | 49 | |
| 105 | -python scripts/recreate_and_import.py \ | |
| 106 | - --tenant-id 162 \ | |
| 107 | - --db-host <mysql_host> \ | |
| 108 | - --db-database saas \ | |
| 109 | - --db-username saas \ | |
| 110 | - --db-password <password> \ | |
| 111 | - --es-host http://localhost:9200 | |
| 50 | +## 3) 总体架构(开发者视角) | |
| 112 | 51 | |
| 113 | -构造查询: | |
| 114 | -参考 @常用查询 - ES.md | |
| 52 | +- `api/`:统一 API 入口(search/admin/indexer app) | |
| 53 | +- `search/`:召回、排序、结果组织 | |
| 54 | +- `query/`:查询解析、多语言处理、改写 | |
| 55 | +- `indexer/`:MySQL 行数据 -> ES 文档的转换与索引流程 | |
| 56 | +- `providers/`:能力调用抽象(translation/embedding/rerank) | |
| 57 | +- `reranker/`:重排服务及后端实现 | |
| 58 | +- `embeddings/`:向量服务(文本/图像) | |
| 59 | +- `config/`:配置加载与服务配置解析 | |
| 115 | 60 | |
| 61 | +关键设计:**Provider(调用方式)与 Backend(推理实现)分离**,新增能力优先在协议与工厂注册,不改调用方主流程。 | |
| 116 | 62 | |
| 117 | -## 核心能力速览 | |
| 63 | +--- | |
| 118 | 64 | |
| 119 | -- **多语言 + 自动翻译**:中文、英文、俄文等语言检测与路由(BGE-M3、DeepL) | |
| 120 | -- **语义 + 关键词混排**:BM25、dense vector(BGE-M3/CN-CLIP)融合 | |
| 121 | -- **布尔与分面**:AND / OR / ANDNOT / RANK、Terms & Range facets | |
| 122 | -- **多租户隔离**:共享 `search_products` 索引,通过 `tenant_id` 严格隔离 | |
| 123 | -- **可配置化**:字段/索引域/排序表达式/查询改写全部配置驱动 | |
| 124 | -- **脚本化流水线**:Mock/CSV 数据 → MySQL → Elasticsearch → API/前端 | |
| 65 | +## 4) 设计原则(避免后续分叉) | |
| 125 | 66 | |
| 126 | -## 新人入口 | |
| 67 | +- **单一配置源**:服务地址、provider 选择、后端参数统一在 `config/config.yaml`,环境变量仅做覆盖。 | |
| 68 | +- **接口契约优先**:外部 API 契约与 provider 契约稳定,内部重构不影响调用方。 | |
| 69 | +- **扩展走工厂**:新增 provider/backend 必须在工厂函数中显式注册,禁止旁路分支。 | |
| 70 | +- **可观测性优先**:健康检查、关键日志、请求上下文必须可追踪。 | |
| 71 | +- **测试优先保障契约**:CI 首先保证接口契约和核心路径可用,再逐步扩展性能与业务测试。 | |
| 127 | 72 | |
| 128 | -**→ 开发者必读**:[docs/DEVELOPER_GUIDE.md](docs/DEVELOPER_GUIDE.md) — 项目全貌、设计原则、扩展规范与迭代检查清单,保证后续开发在统一框架内进行。 | |
| 73 | +--- | |
| 129 | 74 | |
| 130 | -**→ 快速上手**:[docs/QUICKSTART.md](docs/QUICKSTART.md) — 环境、服务、模块、请求示例一页搞定。 | |
| 75 | +## 5) 文档入口(建议阅读顺序) | |
| 131 | 76 | |
| 132 | 77 | | 步骤 | 文档 | |
| 133 | 78 | |------|------| |
| 134 | -| 0. 框架与规范(推荐首读) | `docs/DEVELOPER_GUIDE.md` | | |
| 135 | -| 1. 环境与启动 | `docs/QUICKSTART.md` | | |
| 136 | -| 2. 搜索/索引 API | `docs/QUICKSTART.md` §3、`docs/搜索API速查表.md` | | |
| 137 | -| 3. 运维与故障 | `docs/Usage-Guide.md` | | |
| 138 | -| 4. 架构与扩展 | `docs/PROVIDER_ARCHITECTURE.md`、`docs/MODULE_EXTENSION_SPEC.md`、`docs/系统设计文档.md` | | |
| 139 | - | |
| 140 | -### Runtimes & 命令示例 | |
| 79 | +| 0. 全局规范(首读) | `docs/DEVELOPER_GUIDE.md` | | |
| 80 | +| 1. 开发与配置 | `docs/QUICKSTART.md` | | |
| 81 | +| 2. 运行与排障 | `docs/Usage-Guide.md` | | |
| 82 | +| 3. API 详细说明 | `docs/搜索API对接指南.md` | | |
| 83 | +| 4. 快速参数速查 | `docs/搜索API速查表.md` | | |
| 84 | +| 5. 首次环境搭建 | `docs/环境配置说明.md` | | |
| 141 | 85 | |
| 142 | -```bash | |
| 143 | -# 1. 安装依赖与准备服务(环境创建见 docs/环境配置说明.md) | |
| 144 | -source activate.sh # 或先 export CONDA_ROOT=你的conda路径 | |
| 145 | -pip install -r requirements.txt # 若用 environment.yml 创建环境可省略 | |
| 146 | -docker run -d --name es -p 9200:9200 elasticsearch:8.11.0 | |
| 86 | +--- | |
| 147 | 87 | |
| 148 | -# 2. 构造测试数据并导入 MySQL | |
| 149 | -./scripts/mock_data.sh # 详见 TEST_DATA_GUIDE.md | |
| 88 | +## 6) 持续集成测试(推荐最小集) | |
| 150 | 89 | |
| 151 | -# 3. 创建租户索引结构并导入数据(推荐) | |
| 152 | -./scripts/create_tenant_index.sh 162 | |
| 153 | -curl -X POST "http://localhost:6004/indexer/reindex" \ | |
| 154 | - -H "Content-Type: application/json" \ | |
| 155 | - -d '{"tenant_id":"162","batch_size":500}' | |
| 90 | +本仓库提供一套轻量、稳定、易维护的 CI 测试入口,覆盖以下服务契约: | |
| 156 | 91 | |
| 157 | -# 4. 启动核心服务(backend/indexer/frontend) | |
| 158 | -./run.sh | |
| 92 | +- 搜索接口(search API) | |
| 93 | +- 索引接口(indexer API) | |
| 94 | +- 向量服务(embedding service) | |
| 95 | +- 翻译服务(translator service) | |
| 96 | +- 重排服务(reranker service) | |
| 159 | 97 | |
| 160 | -# (可选)附加启动 embedding / translator / reranker | |
| 161 | -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh | |
| 162 | -# | |
| 163 | -# 查看服务状态 / 停止 | |
| 164 | -./scripts/service_ctl.sh status | |
| 165 | -./scripts/stop.sh | |
| 98 | +本地运行: | |
| 166 | 99 | |
| 167 | -# 5. 调用文本搜索 API | |
| 168 | -curl -X POST http://localhost:6002/search/ \ | |
| 169 | - -H "Content-Type: application/json" \ | |
| 170 | - -H "X-Tenant-ID: 1" \ | |
| 171 | - -d '{"query": "玩具", "size": 10}' | |
| 100 | +```bash | |
| 101 | +source activate.sh | |
| 102 | +python -m pytest tests/ci -q | |
| 172 | 103 | ``` |
| 173 | 104 | |
| 174 | -## 文档索引 | |
| 105 | +该测试集采用 mock/stub,**不依赖真实 ES/MySQL/大模型服务**,适合作为 PR 级快速回归门禁。 | |
| 175 | 106 | |
| 176 | -| 文档 | 用途 | | |
| 177 | -|------|------| | |
| 178 | -| `docs/DEVELOPER_GUIDE.md` | **开发者开放指南**:全貌、原则、规范、检查清单 | | |
| 179 | -| `docs/QUICKSTART.md` | 新人上手:环境、服务、模块、请求 | | |
| 180 | -| `docs/Usage-Guide.md` | 运维:日志、多环境、故障排查 | | |
| 181 | -| `docs/搜索API速查表.md` | 搜索 API 参数速查 | | |
| 182 | -| `docs/搜索API对接指南.md` | 搜索 API 完整说明 | | |
| 183 | -| `docs/PROVIDER_ARCHITECTURE.md` | 翻译/向量/重排 provider 扩展 | | |
| 184 | -| `docs/MODULE_EXTENSION_SPEC.md` | 向量/重排后端可插拔规范 | | |
| 185 | -| `docs/环境配置说明.md` | 首次部署、新机器环境 | | |
| 186 | -| `docs/系统设计文档.md` | 架构与模块细节 | | |
| 187 | - | |
| 188 | -## 关键工作流指引 | |
| 189 | - | |
| 190 | -- **数据构建 → MySQL → Elasticsearch** | |
| 191 | - - `scripts/mock_data.sh`:Tenant1 Mock + Tenant2 CSV 一条龙 | |
| 192 | - - `scripts/create_tenant_index.sh <tenant_id>` + `POST /indexer/reindex`:推荐导入链路 | |
| 193 | - - 详解:`测试数据指南.md` | |
| 194 | - | |
| 195 | -- **索引富化 & Java 对接** | |
| 196 | - - Java 索引程序负责:全量/增量调度 + 从 MySQL 查询 `shoplazza_product_spu/sku/option/...` | |
| 197 | - - Python `indexer` 模块负责:**MySQL 行 → ES doc** 的全部逻辑(多语言、翻译、向量、规格聚合等) | |
| 198 | - - 正式对接接口(推荐): | |
| 199 | - - `POST http://<indexer_host>:6004/indexer/build-docs` | |
| 200 | - - 入参:`tenant_id + items[{spu, skus, options}]` | |
| 201 | - - 出参:与 `mappings/search_products.json` 完全一致的 `docs` 列表,上游自行写入 ES | |
| 202 | - - 调试/自测接口(内部使用): | |
| 203 | - - `POST http://127.0.0.1:6004/indexer/build-docs-from-db`,只需要 `tenant_id + spu_ids`,由服务内部查库并返回 ES doc | |
| 204 | - - 详解:`indexer/README.md`、`docs/索引字段说明v2.md` | |
| 205 | - | |
| 206 | -- **搜索服务 & API** | |
| 207 | - - `api/`(FastAPI)承载 REST API,`search/` + `query/` 负责查询解析与下发 | |
| 208 | - - API、分页、过滤、Facet、KNN 等:`搜索API对接指南.md` | |
| 209 | - - 对接案例、示例与错误码:`搜索API对接指南.md`、`Search-API-Examples.md` | |
| 210 | - | |
| 211 | -- **统一配置** | |
| 212 | - - 所有租户共享统一的索引结构和查询配置(硬编码) | |
| 213 | - - 索引 mapping: `mappings/search_products.json` | |
| 214 | - - 查询配置: `search/query_config.py` | |
| 215 | - - 详解:`基础配置指南.md`、`索引字段说明v2.md` | |
| 216 | - | |
| 217 | -## 仓库结构(概览) | |
| 107 | +--- | |
| 218 | 108 | |
| 219 | -``` | |
| 220 | -api/ FastAPI 服务与路由 | |
| 221 | -config/ 字段/索引/查询配置体系 | |
| 222 | -indexer/ MySQL → ES 管道(mapping / transformer / bulk) | |
| 223 | -query/ 查询解析、改写、翻译、embedding | |
| 224 | -search/ 多语言构建、布尔解析、排序引擎 | |
| 225 | -scripts/ 数据/服务脚本(mock_data, ingest, run 等) | |
| 226 | -frontend/ 简易调试页面 | |
| 227 | -docs/ 运营及中文资料 | |
| 228 | -``` | |
| 109 | +## 7) 代码质量与持续继承要求 | |
| 110 | + | |
| 111 | +- 新增功能必须补最小测试(至少覆盖 1 条成功路径 + 1 条参数异常路径) | |
| 112 | +- 修改公共协议时必须同步更新: | |
| 113 | + - `docs/QUICKSTART.md` | |
| 114 | + - 对应服务 README / API 文档 | |
| 115 | + - `tests/ci` 契约用例 | |
| 116 | +- 禁止新增“临时分支逻辑”绕过 provider/backend 工厂 | |
| 117 | +- 优先减少重复实现,复用现有转换链路与配置解析入口 | ... | ... |
docs/QUICKSTART.md
| ... | ... | @@ -27,6 +27,7 @@ |
| 27 | 27 | 4. [模块扩展规范(Embedding / Rerank)](#4-模块扩展规范embedding--rerank) |
| 28 | 28 | 5. [验证、日志与常见排障入口](#5-验证日志与常见排障入口) |
| 29 | 29 | 6. [相关文档](#6-相关文档) |
| 30 | +7. [持续集成测试(最小可维护方案)](#7-持续集成测试最小可维护方案) | |
| 30 | 31 | |
| 31 | 32 | --- |
| 32 | 33 | |
| ... | ... | @@ -374,3 +375,32 @@ lsof -i :6004 |
| 374 | 375 | | `indexer/README.md` | 索引模块职责与接口 | |
| 375 | 376 | | `embeddings/README.md` | 向量化服务说明 | |
| 376 | 377 | | `reranker/README.md` | 重排服务说明 | |
| 378 | + | |
| 379 | +--- | |
| 380 | + | |
| 381 | +## 7. 持续集成测试(最小可维护方案) | |
| 382 | + | |
| 383 | +目标:让后续开发者在不依赖真实 ES/MySQL/模型服务的前提下,快速验证核心服务契约不被破坏。 | |
| 384 | + | |
| 385 | +### 7.1 测试范围 | |
| 386 | + | |
| 387 | +`tests/ci/test_service_api_contracts.py` 覆盖: | |
| 388 | + | |
| 389 | +- 搜索接口:`/search/`、`/search/image`、`/search/suggestions` | |
| 390 | +- 索引接口:`/indexer/reindex`、`/indexer/index`、`/indexer/build-docs` | |
| 391 | +- 向量服务:`/embed/text`、`/embed/image` | |
| 392 | +- 翻译服务:`/translate`、`/health` | |
| 393 | +- 重排服务:`/rerank`、`/health` | |
| 394 | + | |
| 395 | +### 7.2 运行方式 | |
| 396 | + | |
| 397 | +```bash | |
| 398 | +source activate.sh | |
| 399 | +python -m pytest tests/ci -q | |
| 400 | +``` | |
| 401 | + | |
| 402 | +### 7.3 设计取舍 | |
| 403 | + | |
| 404 | +- 使用 mock/stub 注入依赖,确保测试快且稳定 | |
| 405 | +- 重点测“接口契约与参数行为”,而不是底层模型质量 | |
| 406 | +- 作为 PR 级门禁;真实环境联调放在运维/预发布流程 | ... | ... |
| ... | ... | @@ -0,0 +1,281 @@ |
| 1 | +from __future__ import annotations | |
| 2 | + | |
| 3 | +from types import SimpleNamespace | |
| 4 | +from typing import Any, Dict, List | |
| 5 | + | |
| 6 | +import numpy as np | |
| 7 | +import pytest | |
| 8 | +from fastapi.testclient import TestClient | |
| 9 | + | |
| 10 | + | |
| 11 | +class _FakeSearcher: | |
| 12 | + def search(self, **kwargs): | |
| 13 | + return SimpleNamespace( | |
| 14 | + results=[ | |
| 15 | + { | |
| 16 | + "spu_id": "spu-1", | |
| 17 | + "title": "测试商品", | |
| 18 | + "price": 99.0, | |
| 19 | + "currency": "USD", | |
| 20 | + "in_stock": True, | |
| 21 | + "skus": [], | |
| 22 | + "relevance_score": 1.2, | |
| 23 | + } | |
| 24 | + ], | |
| 25 | + total=1, | |
| 26 | + max_score=1.2, | |
| 27 | + took_ms=8, | |
| 28 | + facets=[], | |
| 29 | + query_info={"normalized_query": kwargs.get("query", "")}, | |
| 30 | + suggestions=[], | |
| 31 | + related_searches=[], | |
| 32 | + debug_info=None, | |
| 33 | + ) | |
| 34 | + | |
| 35 | + def search_by_image(self, **kwargs): | |
| 36 | + return self.search(**kwargs) | |
| 37 | + | |
| 38 | + | |
| 39 | +class _FakeSuggestionService: | |
| 40 | + def search(self, **kwargs): | |
| 41 | + return { | |
| 42 | + "query": kwargs["query"], | |
| 43 | + "language": kwargs.get("language", "en"), | |
| 44 | + "resolved_language": kwargs.get("language", "en"), | |
| 45 | + "suggestions": [{"text": "iphone 15", "score": 1.0}], | |
| 46 | + "took_ms": 3, | |
| 47 | + } | |
| 48 | + | |
| 49 | + | |
| 50 | +@pytest.fixture | |
| 51 | +def search_client(monkeypatch): | |
| 52 | + import api.app as search_app | |
| 53 | + | |
| 54 | + monkeypatch.setattr(search_app, "init_service", lambda es_host="": None) | |
| 55 | + monkeypatch.setattr(search_app, "get_searcher", lambda: _FakeSearcher()) | |
| 56 | + monkeypatch.setattr(search_app, "get_suggestion_service", lambda: _FakeSuggestionService()) | |
| 57 | + | |
| 58 | + with TestClient(search_app.app) as client: | |
| 59 | + yield client | |
| 60 | + | |
| 61 | + | |
| 62 | +def test_search_api_contract(search_client: TestClient): | |
| 63 | + response = search_client.post( | |
| 64 | + "/search/", | |
| 65 | + headers={"X-Tenant-ID": "162"}, | |
| 66 | + json={"query": "toy", "size": 5}, | |
| 67 | + ) | |
| 68 | + assert response.status_code == 200 | |
| 69 | + data = response.json() | |
| 70 | + assert data["total"] == 1 | |
| 71 | + assert data["results"][0]["spu_id"] == "spu-1" | |
| 72 | + | |
| 73 | + | |
| 74 | +def test_image_search_api_contract(search_client: TestClient): | |
| 75 | + response = search_client.post( | |
| 76 | + "/search/image", | |
| 77 | + headers={"X-Tenant-ID": "162"}, | |
| 78 | + json={"image_url": "https://example.com/a.jpg", "size": 3}, | |
| 79 | + ) | |
| 80 | + assert response.status_code == 200 | |
| 81 | + assert response.json()["results"][0]["spu_id"] == "spu-1" | |
| 82 | + | |
| 83 | + | |
| 84 | +def test_suggestion_api_contract(search_client: TestClient): | |
| 85 | + response = search_client.get( | |
| 86 | + "/search/suggestions?q=iph&size=5&language=en", | |
| 87 | + headers={"X-Tenant-ID": "162"}, | |
| 88 | + ) | |
| 89 | + assert response.status_code == 200 | |
| 90 | + data = response.json() | |
| 91 | + assert data["query"] == "iph" | |
| 92 | + assert len(data["suggestions"]) == 1 | |
| 93 | + | |
| 94 | + | |
| 95 | +class _FakeBulkService: | |
| 96 | + def bulk_index(self, tenant_id: str, recreate_index: bool, batch_size: int): | |
| 97 | + return { | |
| 98 | + "tenant_id": tenant_id, | |
| 99 | + "recreate_index": recreate_index, | |
| 100 | + "batch_size": batch_size, | |
| 101 | + "success": True, | |
| 102 | + } | |
| 103 | + | |
| 104 | + | |
| 105 | +class _FakeTransformer: | |
| 106 | + def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options): | |
| 107 | + return { | |
| 108 | + "tenant_id": tenant_id, | |
| 109 | + "spu_id": str(spu_row.get("id", "0")), | |
| 110 | + "title": {"zh": str(spu_row.get("title", ""))}, | |
| 111 | + } | |
| 112 | + | |
| 113 | + | |
| 114 | +class _FakeIncrementalService: | |
| 115 | + def index_spus_to_es(self, es_client, tenant_id: str, spu_ids: List[str], delete_spu_ids=None): | |
| 116 | + return { | |
| 117 | + "tenant_id": tenant_id, | |
| 118 | + "spu_ids": [{"spu_id": s, "status": "indexed"} for s in spu_ids], | |
| 119 | + "delete_spu_ids": [], | |
| 120 | + "total": len(spu_ids), | |
| 121 | + "success_count": len(spu_ids), | |
| 122 | + "failed_count": 0, | |
| 123 | + } | |
| 124 | + | |
| 125 | + def _get_transformer_bundle(self, tenant_id: str): | |
| 126 | + return _FakeTransformer(), None, False | |
| 127 | + | |
| 128 | + | |
| 129 | +@pytest.fixture | |
| 130 | +def indexer_client(monkeypatch): | |
| 131 | + import api.indexer_app as indexer_app | |
| 132 | + import api.routes.indexer as indexer_routes | |
| 133 | + | |
| 134 | + monkeypatch.setattr(indexer_app, "init_indexer_service", lambda es_host="": None) | |
| 135 | + monkeypatch.setattr(indexer_routes, "get_bulk_indexing_service", lambda: _FakeBulkService()) | |
| 136 | + monkeypatch.setattr(indexer_routes, "get_incremental_service", lambda: _FakeIncrementalService()) | |
| 137 | + monkeypatch.setattr(indexer_routes, "get_es_client", lambda: object()) | |
| 138 | + | |
| 139 | + with TestClient(indexer_app.app) as client: | |
| 140 | + yield client | |
| 141 | + | |
| 142 | + | |
| 143 | +def test_indexer_reindex_contract(indexer_client: TestClient): | |
| 144 | + response = indexer_client.post( | |
| 145 | + "/indexer/reindex", | |
| 146 | + json={"tenant_id": "162", "batch_size": 100}, | |
| 147 | + ) | |
| 148 | + assert response.status_code == 200 | |
| 149 | + assert response.json()["success"] is True | |
| 150 | + | |
| 151 | + | |
| 152 | +def test_indexer_incremental_contract(indexer_client: TestClient): | |
| 153 | + response = indexer_client.post( | |
| 154 | + "/indexer/index", | |
| 155 | + json={"tenant_id": "162", "spu_ids": ["1001", "1002"]}, | |
| 156 | + ) | |
| 157 | + assert response.status_code == 200 | |
| 158 | + data = response.json() | |
| 159 | + assert data["success_count"] == 2 | |
| 160 | + | |
| 161 | + | |
| 162 | +def test_indexer_build_docs_contract(indexer_client: TestClient): | |
| 163 | + response = indexer_client.post( | |
| 164 | + "/indexer/build-docs", | |
| 165 | + json={ | |
| 166 | + "tenant_id": "162", | |
| 167 | + "items": [{"spu": {"id": 1, "title": "T-shirt"}, "skus": [], "options": []}], | |
| 168 | + }, | |
| 169 | + ) | |
| 170 | + assert response.status_code == 200 | |
| 171 | + data = response.json() | |
| 172 | + assert data["success_count"] == 1 | |
| 173 | + assert data["docs"][0]["spu_id"] == "1" | |
| 174 | + | |
| 175 | + | |
| 176 | +class _FakeTextModel: | |
| 177 | + def encode_batch(self, texts, batch_size=32, device="cpu"): | |
| 178 | + return [np.array([0.1, 0.2, 0.3], dtype=np.float32) for _ in texts] | |
| 179 | + | |
| 180 | + | |
| 181 | +class _FakeImageModel: | |
| 182 | + def encode_image_urls(self, urls, batch_size=8): | |
| 183 | + return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls] | |
| 184 | + | |
| 185 | + | |
| 186 | +@pytest.fixture | |
| 187 | +def embedding_client(): | |
| 188 | + import embeddings.server as emb_server | |
| 189 | + | |
| 190 | + emb_server.app.router.on_startup.clear() | |
| 191 | + emb_server._text_model = _FakeTextModel() | |
| 192 | + emb_server._image_model = _FakeImageModel() | |
| 193 | + | |
| 194 | + with TestClient(emb_server.app) as client: | |
| 195 | + yield client | |
| 196 | + | |
| 197 | + | |
| 198 | +def test_embedding_text_contract(embedding_client: TestClient): | |
| 199 | + response = embedding_client.post("/embed/text", json=["hello", "world"]) | |
| 200 | + assert response.status_code == 200 | |
| 201 | + data = response.json() | |
| 202 | + assert len(data) == 2 | |
| 203 | + assert len(data[0]) == 3 | |
| 204 | + | |
| 205 | + | |
| 206 | +def test_embedding_image_contract(embedding_client: TestClient): | |
| 207 | + response = embedding_client.post("/embed/image", json=["https://example.com/a.jpg"]) | |
| 208 | + assert response.status_code == 200 | |
| 209 | + assert len(response.json()[0]) == 3 | |
| 210 | + | |
| 211 | + | |
| 212 | +class _FakeTranslator: | |
| 213 | + model = "qwen" | |
| 214 | + use_cache = True | |
| 215 | + | |
| 216 | + def translate(self, text: str, target_lang: str, source_lang: str | None = None, prompt: str | None = None): | |
| 217 | + return f"{text}-{target_lang}" | |
| 218 | + | |
| 219 | + | |
| 220 | +@pytest.fixture | |
| 221 | +def translator_client(monkeypatch): | |
| 222 | + import api.translator_app as translator_app | |
| 223 | + | |
| 224 | + translator_app.app.router.on_startup.clear() | |
| 225 | + monkeypatch.setattr(translator_app, "get_translator", lambda model="qwen": _FakeTranslator()) | |
| 226 | + | |
| 227 | + with TestClient(translator_app.app) as client: | |
| 228 | + yield client | |
| 229 | + | |
| 230 | + | |
| 231 | +def test_translator_api_contract(translator_client: TestClient): | |
| 232 | + response = translator_client.post( | |
| 233 | + "/translate", | |
| 234 | + json={"text": "商品名称", "target_lang": "en", "source_lang": "zh"}, | |
| 235 | + ) | |
| 236 | + assert response.status_code == 200 | |
| 237 | + assert response.json()["translated_text"] == "商品名称-en" | |
| 238 | + | |
| 239 | + | |
| 240 | +def test_translator_health_contract(translator_client: TestClient): | |
| 241 | + response = translator_client.get("/health") | |
| 242 | + assert response.status_code == 200 | |
| 243 | + assert response.json()["status"] == "healthy" | |
| 244 | + | |
| 245 | + | |
| 246 | +class _FakeReranker: | |
| 247 | + _model_name = "fake-reranker" | |
| 248 | + | |
| 249 | + def score_with_meta(self, query: str, docs: List[str], normalize: bool = True): | |
| 250 | + scores = [float(i + 1) for i in range(len(docs))] | |
| 251 | + meta: Dict[str, Any] = {"input_docs": len(docs), "unique_docs": len(set(docs))} | |
| 252 | + return scores, meta | |
| 253 | + | |
| 254 | + | |
| 255 | +@pytest.fixture | |
| 256 | +def reranker_client(): | |
| 257 | + import reranker.server as reranker_server | |
| 258 | + | |
| 259 | + reranker_server.app.router.on_startup.clear() | |
| 260 | + reranker_server._reranker = _FakeReranker() | |
| 261 | + reranker_server._backend_name = "fake" | |
| 262 | + | |
| 263 | + with TestClient(reranker_server.app) as client: | |
| 264 | + yield client | |
| 265 | + | |
| 266 | + | |
| 267 | +def test_reranker_api_contract(reranker_client: TestClient): | |
| 268 | + response = reranker_client.post( | |
| 269 | + "/rerank", | |
| 270 | + json={"query": "wireless mouse", "docs": ["doc-a", "doc-b"]}, | |
| 271 | + ) | |
| 272 | + assert response.status_code == 200 | |
| 273 | + data = response.json() | |
| 274 | + assert data["scores"] == [1.0, 2.0] | |
| 275 | + assert data["meta"]["input_docs"] == 2 | |
| 276 | + | |
| 277 | + | |
| 278 | +def test_reranker_health_contract(reranker_client: TestClient): | |
| 279 | + response = reranker_client.get("/health") | |
| 280 | + assert response.status_code == 200 | |
| 281 | + assert response.json()["status"] == "ok" | ... | ... |
tests/test_cloud_embedding.py
| ... | ... | @@ -11,6 +11,8 @@ import time |
| 11 | 11 | from datetime import datetime |
| 12 | 12 | from pathlib import Path |
| 13 | 13 | |
| 14 | +import pytest | |
| 15 | + | |
| 14 | 16 | # Add parent directory to path |
| 15 | 17 | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| 16 | 18 | |
| ... | ... | @@ -44,6 +46,7 @@ def read_queries(file_path: str, limit: int = 100) -> list: |
| 44 | 46 | return queries |
| 45 | 47 | |
| 46 | 48 | |
| 49 | +@pytest.mark.skip(reason="Requires data file and DASHSCOPE_API_KEY; run manually when needed") | |
| 47 | 50 | def test_cloud_embedding(queries_file: str, num_queries: int = 100): |
| 48 | 51 | """ |
| 49 | 52 | Test cloud embedding with queries from file. | ... | ... |
tests/test_cnclip_service.py
| ... | ... | @@ -13,11 +13,17 @@ CN-CLIP 服务测试脚本 |
| 13 | 13 | """ |
| 14 | 14 | |
| 15 | 15 | import sys |
| 16 | -import numpy as np | |
| 17 | -from clip_client import Client | |
| 18 | 16 | |
| 17 | +import pytest | |
| 19 | 18 | |
| 20 | -def test_encoding(client, test_name, inputs): | |
| 19 | +try: | |
| 20 | + import numpy as np | |
| 21 | + from clip_client import Client | |
| 22 | +except ImportError: | |
| 23 | + pytest.skip("clip_client not installed (optional clip-as-service client)", allow_module_level=True) | |
| 24 | + | |
| 25 | + | |
| 26 | +def _test_encoding(client, test_name, inputs): | |
| 21 | 27 | """测试编码功能""" |
| 22 | 28 | print(f"\n{test_name}...") |
| 23 | 29 | try: |
| ... | ... | @@ -74,21 +80,21 @@ def main(): |
| 74 | 80 | results = [] |
| 75 | 81 | |
| 76 | 82 | # 测试1: 文本编码 |
| 77 | - results.append(test_encoding( | |
| 83 | + results.append(_test_encoding( | |
| 78 | 84 | client, |
| 79 | 85 | "测试1: 编码文本", |
| 80 | 86 | ['这是一个测试文本', '另一个测试文本'] |
| 81 | 87 | )) |
| 82 | 88 | |
| 83 | 89 | # 测试2: 图像编码 |
| 84 | - results.append(test_encoding( | |
| 90 | + results.append(_test_encoding( | |
| 85 | 91 | client, |
| 86 | 92 | "测试2: 编码图像(远程 URL)", |
| 87 | 93 | ['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] |
| 88 | 94 | )) |
| 89 | 95 | |
| 90 | 96 | # 测试3: 混合编码 |
| 91 | - results.append(test_encoding( | |
| 97 | + results.append(_test_encoding( | |
| 92 | 98 | client, |
| 93 | 99 | "测试3: 混合编码(文本和图像)", |
| 94 | 100 | ['这是一段文本', 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] | ... | ... |