Commit 7299bae6396856b85f415a780200216d2d97fede
1 parent
bc54124c
tests
Showing
7 changed files
with
435 additions
and
723 deletions
Show diff stats
.github/workflows/test.yml
| 1 | -name: saas-search Test Pipeline | 1 | +name: CI - Service Contracts |
| 2 | 2 | ||
| 3 | on: | 3 | on: |
| 4 | push: | 4 | push: |
| 5 | - branches: [ main, master, develop ] | 5 | + branches: [main, master, develop] |
| 6 | pull_request: | 6 | pull_request: |
| 7 | - branches: [ main, master, develop ] | ||
| 8 | - workflow_dispatch: # 允许手动触发 | ||
| 9 | - | ||
| 10 | -env: | ||
| 11 | - PYTHON_VERSION: '3.9' | ||
| 12 | - NODE_VERSION: '16' | 7 | + branches: [main, master, develop] |
| 8 | + workflow_dispatch: | ||
| 13 | 9 | ||
| 14 | jobs: | 10 | jobs: |
| 15 | - # 代码质量检查 | ||
| 16 | - code-quality: | ||
| 17 | - runs-on: ubuntu-latest | ||
| 18 | - name: Code Quality Check | ||
| 19 | - | ||
| 20 | - steps: | ||
| 21 | - - name: Checkout code | ||
| 22 | - uses: actions/checkout@v4 | ||
| 23 | - | ||
| 24 | - - name: Set up Python | ||
| 25 | - uses: actions/setup-python@v4 | ||
| 26 | - with: | ||
| 27 | - python-version: ${{ env.PYTHON_VERSION }} | ||
| 28 | - | ||
| 29 | - - name: Install dependencies | ||
| 30 | - run: | | ||
| 31 | - python -m pip install --upgrade pip | ||
| 32 | - pip install flake8 black isort mypy pylint | ||
| 33 | - pip install -r requirements.txt | ||
| 34 | - | ||
| 35 | - - name: Run Black (code formatting) | ||
| 36 | - run: | | ||
| 37 | - black --check --diff . | ||
| 38 | - | ||
| 39 | - - name: Run isort (import sorting) | ||
| 40 | - run: | | ||
| 41 | - isort --check-only --diff . | ||
| 42 | - | ||
| 43 | - - name: Run Flake8 (linting) | ||
| 44 | - run: | | ||
| 45 | - flake8 --max-line-length=100 --ignore=E203,W503 . | ||
| 46 | - | ||
| 47 | - - name: Run MyPy (type checking) | ||
| 48 | - run: | | ||
| 49 | - mypy --ignore-missing-imports --no-strict-optional . | ||
| 50 | - | ||
| 51 | - - name: Run Pylint | ||
| 52 | - run: | | ||
| 53 | - pylint --disable=C0114,C0115,C0116 --errors-only . | ||
| 54 | - | ||
| 55 | - # 单元测试 | ||
| 56 | - unit-tests: | ||
| 57 | - runs-on: ubuntu-latest | ||
| 58 | - name: Unit Tests | ||
| 59 | - | ||
| 60 | - strategy: | ||
| 61 | - matrix: | ||
| 62 | - python-version: ['3.8', '3.9', '3.10', '3.11'] | ||
| 63 | - | ||
| 64 | - steps: | ||
| 65 | - - name: Checkout code | ||
| 66 | - uses: actions/checkout@v4 | ||
| 67 | - | ||
| 68 | - - name: Set up Python ${{ matrix.python-version }} | ||
| 69 | - uses: actions/setup-python@v4 | ||
| 70 | - with: | ||
| 71 | - python-version: ${{ matrix.python-version }} | ||
| 72 | - | ||
| 73 | - - name: Cache pip dependencies | ||
| 74 | - uses: actions/cache@v3 | ||
| 75 | - with: | ||
| 76 | - path: ~/.cache/pip | ||
| 77 | - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt') }} | ||
| 78 | - restore-keys: | | ||
| 79 | - ${{ runner.os }}-pip- | ||
| 80 | - | ||
| 81 | - - name: Install dependencies | ||
| 82 | - run: | | ||
| 83 | - python -m pip install --upgrade pip | ||
| 84 | - pip install pytest pytest-cov pytest-json-report | ||
| 85 | - pip install -r requirements.txt | ||
| 86 | - | ||
| 87 | - - name: Create test logs directory | ||
| 88 | - run: mkdir -p test_logs | ||
| 89 | - | ||
| 90 | - - name: Run unit tests | ||
| 91 | - run: | | ||
| 92 | - python -m pytest tests/unit/ \ | ||
| 93 | - -v \ | ||
| 94 | - --tb=short \ | ||
| 95 | - --cov=. \ | ||
| 96 | - --cov-report=xml \ | ||
| 97 | - --cov-report=html \ | ||
| 98 | - --cov-report=term-missing \ | ||
| 99 | - --json-report \ | ||
| 100 | - --json-report-file=test_logs/unit_test_results.json | ||
| 101 | - | ||
| 102 | - - name: Upload coverage to Codecov | ||
| 103 | - uses: codecov/codecov-action@v3 | ||
| 104 | - with: | ||
| 105 | - file: ./coverage.xml | ||
| 106 | - flags: unittests | ||
| 107 | - name: codecov-umbrella | ||
| 108 | - | ||
| 109 | - - name: Upload unit test results | ||
| 110 | - uses: actions/upload-artifact@v3 | ||
| 111 | - if: always() | ||
| 112 | - with: | ||
| 113 | - name: unit-test-results-${{ matrix.python-version }} | ||
| 114 | - path: | | ||
| 115 | - test_logs/unit_test_results.json | ||
| 116 | - htmlcov/ | ||
| 117 | - | ||
| 118 | - # 集成测试 | ||
| 119 | - integration-tests: | ||
| 120 | - runs-on: ubuntu-latest | ||
| 121 | - name: Integration Tests | ||
| 122 | - needs: [code-quality, unit-tests] | ||
| 123 | - | ||
| 124 | - services: | ||
| 125 | - elasticsearch: | ||
| 126 | - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0 | ||
| 127 | - env: | ||
| 128 | - discovery.type: single-node | ||
| 129 | - ES_JAVA_OPTS: -Xms1g -Xmx1g | ||
| 130 | - xpack.security.enabled: false | ||
| 131 | - ports: | ||
| 132 | - - 9200:9200 | ||
| 133 | - options: >- | ||
| 134 | - --health-cmd "curl http://localhost:9200/_cluster/health" | ||
| 135 | - --health-interval 10s | ||
| 136 | - --health-timeout 5s | ||
| 137 | - --health-retries 10 | ||
| 138 | - | ||
| 139 | - steps: | ||
| 140 | - - name: Checkout code | ||
| 141 | - uses: actions/checkout@v4 | ||
| 142 | - | ||
| 143 | - - name: Set up Python | ||
| 144 | - uses: actions/setup-python@v4 | ||
| 145 | - with: | ||
| 146 | - python-version: ${{ env.PYTHON_VERSION }} | ||
| 147 | - | ||
| 148 | - - name: Install system dependencies | ||
| 149 | - run: | | ||
| 150 | - sudo apt-get update | ||
| 151 | - sudo apt-get install -y curl | ||
| 152 | - | ||
| 153 | - - name: Install Python dependencies | ||
| 154 | - run: | | ||
| 155 | - python -m pip install --upgrade pip | ||
| 156 | - pip install pytest pytest-json-report httpx | ||
| 157 | - pip install -r requirements.txt | ||
| 158 | - | ||
| 159 | - - name: Create test logs directory | ||
| 160 | - run: mkdir -p test_logs | ||
| 161 | - | ||
| 162 | - - name: Wait for Elasticsearch | ||
| 163 | - run: | | ||
| 164 | - echo "Waiting for Elasticsearch to be ready..." | ||
| 165 | - for i in {1..30}; do | ||
| 166 | - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then | ||
| 167 | - echo "Elasticsearch is ready" | ||
| 168 | - break | ||
| 169 | - fi | ||
| 170 | - echo "Attempt $i/30: Elasticsearch not ready yet" | ||
| 171 | - sleep 2 | ||
| 172 | - done | ||
| 173 | - | ||
| 174 | - - name: Setup test index | ||
| 175 | - run: | | ||
| 176 | - curl -X PUT http://localhost:9200/test_products \ | ||
| 177 | - -H 'Content-Type: application/json' \ | ||
| 178 | - -d '{ | ||
| 179 | - "settings": { | ||
| 180 | - "number_of_shards": 1, | ||
| 181 | - "number_of_replicas": 0 | ||
| 182 | - }, | ||
| 183 | - "mappings": { | ||
| 184 | - "properties": { | ||
| 185 | - "name": {"type": "text"}, | ||
| 186 | - "brand_name": {"type": "text"}, | ||
| 187 | - "tags": {"type": "text"}, | ||
| 188 | - "price": {"type": "double"}, | ||
| 189 | - "category_id": {"type": "integer"}, | ||
| 190 | - "spu_id": {"type": "keyword"}, | ||
| 191 | - "text_embedding": {"type": "dense_vector", "dims": 1024} | ||
| 192 | - } | ||
| 193 | - } | ||
| 194 | - }' | ||
| 195 | - | ||
| 196 | - - name: Insert test data | ||
| 197 | - run: | | ||
| 198 | - curl -X POST http://localhost:9200/test_products/_bulk \ | ||
| 199 | - -H 'Content-Type: application/json' \ | ||
| 200 | - --data-binary @- << 'EOF' | ||
| 201 | -{"index": {"_id": "1"}} | ||
| 202 | -{"name": "红色连衣裙", "brand_name": "测试品牌", "tags": ["红色", "连衣裙", "女装"], "price": 299.0, "category_id": 1, "spu_id": "dress_001"} | ||
| 203 | -{"index": {"_id": "2"}} | ||
| 204 | -{"name": "蓝色连衣裙", "brand_name": "测试品牌", "tags": ["蓝色", "连衣裙", "女装"], "price": 399.0, "category_id": 1, "spu_id": "dress_002"} | ||
| 205 | -{"index": {"_id": "3"}} | ||
| 206 | -{"name": "智能手机", "brand_name": "科技品牌", "tags": ["智能", "手机", "数码"], "price": 2999.0, "category_id": 2, "spu_id": "phone_001"} | ||
| 207 | -EOF | ||
| 208 | - | ||
| 209 | - - name: Run integration tests | ||
| 210 | - env: | ||
| 211 | - ES_HOST: http://localhost:9200 | ||
| 212 | - TENANT_ID: test_tenant | ||
| 213 | - TESTING_MODE: true | ||
| 214 | - run: | | ||
| 215 | - python -m pytest tests/integration/ \ | ||
| 216 | - -v \ | ||
| 217 | - --tb=short \ | ||
| 218 | - -m "not slow" \ | ||
| 219 | - --json-report \ | ||
| 220 | - --json-report-file=test_logs/integration_test_results.json | ||
| 221 | - | ||
| 222 | - - name: Upload integration test results | ||
| 223 | - uses: actions/upload-artifact@v3 | ||
| 224 | - if: always() | ||
| 225 | - with: | ||
| 226 | - name: integration-test-results | ||
| 227 | - path: test_logs/integration_test_results.json | ||
| 228 | - | ||
| 229 | - # API测试 | ||
| 230 | - api-tests: | ||
| 231 | - runs-on: ubuntu-latest | ||
| 232 | - name: API Tests | ||
| 233 | - needs: [code-quality, unit-tests] | ||
| 234 | - | ||
| 235 | - services: | ||
| 236 | - elasticsearch: | ||
| 237 | - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0 | ||
| 238 | - env: | ||
| 239 | - discovery.type: single-node | ||
| 240 | - ES_JAVA_OPTS: -Xms1g -Xmx1g | ||
| 241 | - xpack.security.enabled: false | ||
| 242 | - ports: | ||
| 243 | - - 9200:9200 | ||
| 244 | - options: >- | ||
| 245 | - --health-cmd "curl http://localhost:9200/_cluster/health" | ||
| 246 | - --health-interval 10s | ||
| 247 | - --health-timeout 5s | ||
| 248 | - --health-retries 10 | ||
| 249 | - | ||
| 250 | - steps: | ||
| 251 | - - name: Checkout code | ||
| 252 | - uses: actions/checkout@v4 | ||
| 253 | - | ||
| 254 | - - name: Set up Python | ||
| 255 | - uses: actions/setup-python@v4 | ||
| 256 | - with: | ||
| 257 | - python-version: ${{ env.PYTHON_VERSION }} | ||
| 258 | - | ||
| 259 | - - name: Install system dependencies | ||
| 260 | - run: | | ||
| 261 | - sudo apt-get update | ||
| 262 | - sudo apt-get install -y curl | ||
| 263 | - | ||
| 264 | - - name: Install Python dependencies | ||
| 265 | - run: | | ||
| 266 | - python -m pip install --upgrade pip | ||
| 267 | - pip install pytest pytest-json-report httpx | ||
| 268 | - pip install -r requirements.txt | ||
| 269 | - | ||
| 270 | - - name: Create test logs directory | ||
| 271 | - run: mkdir -p test_logs | ||
| 272 | - | ||
| 273 | - - name: Wait for Elasticsearch | ||
| 274 | - run: | | ||
| 275 | - echo "Waiting for Elasticsearch to be ready..." | ||
| 276 | - for i in {1..30}; do | ||
| 277 | - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then | ||
| 278 | - echo "Elasticsearch is ready" | ||
| 279 | - break | ||
| 280 | - fi | ||
| 281 | - echo "Attempt $i/30: Elasticsearch not ready yet" | ||
| 282 | - sleep 2 | ||
| 283 | - done | ||
| 284 | - | ||
| 285 | - - name: Setup test index and data | ||
| 286 | - run: | | ||
| 287 | - # 创建索引 | ||
| 288 | - curl -X PUT http://localhost:9200/test_products \ | ||
| 289 | - -H 'Content-Type: application/json' \ | ||
| 290 | - -d '{ | ||
| 291 | - "settings": {"number_of_shards": 1, "number_of_replicas": 0}, | ||
| 292 | - "mappings": { | ||
| 293 | - "properties": { | ||
| 294 | - "name": {"type": "text"}, "brand_name": {"type": "text"}, | ||
| 295 | - "tags": {"type": "text"}, "price": {"type": "double"}, | ||
| 296 | - "category_id": {"type": "integer"}, "spu_id": {"type": "keyword"}, | ||
| 297 | - "text_embedding": {"type": "dense_vector", "dims": 1024} | ||
| 298 | - } | ||
| 299 | - } | ||
| 300 | - }' | ||
| 301 | - | ||
| 302 | - # 插入测试数据 | ||
| 303 | - curl -X POST http://localhost:9200/test_products/_bulk \ | ||
| 304 | - -H 'Content-Type: application/json' \ | ||
| 305 | - --data-binary @- << 'EOF' | ||
| 306 | -{"index": {"_id": "1"}} | ||
| 307 | -{"name": "红色连衣裙", "brand_name": "测试品牌", "tags": ["红色", "连衣裙", "女装"], "price": 299.0, "category_id": 1, "spu_id": "dress_001"} | ||
| 308 | -{"index": {"_id": "2"}} | ||
| 309 | -{"name": "蓝色连衣裙", "brand_name": "测试品牌", "tags": ["蓝色", "连衣裙", "女装"], "price": 399.0, "category_id": 1, "spu_id": "dress_002"} | ||
| 310 | -EOF | ||
| 311 | - | ||
| 312 | - - name: Start API service | ||
| 313 | - env: | ||
| 314 | - ES_HOST: http://localhost:9200 | ||
| 315 | - TENANT_ID: test_tenant | ||
| 316 | - API_HOST: 127.0.0.1 | ||
| 317 | - API_PORT: 6003 | ||
| 318 | - TESTING_MODE: true | ||
| 319 | - run: | | ||
| 320 | - python -m api.app \ | ||
| 321 | - --host $API_HOST \ | ||
| 322 | - --port $API_PORT \ | ||
| 323 | - --tenant $TENANT_ID \ | ||
| 324 | - --es-host $ES_HOST & | ||
| 325 | - echo $! > api.pid | ||
| 326 | - | ||
| 327 | - # 等待API服务启动 | ||
| 328 | - for i in {1..30}; do | ||
| 329 | - if curl -s http://$API_HOST:$API_PORT/health > /dev/null; then | ||
| 330 | - echo "API service is ready" | ||
| 331 | - break | ||
| 332 | - fi | ||
| 333 | - echo "Attempt $i/30: API service not ready yet" | ||
| 334 | - sleep 2 | ||
| 335 | - done | ||
| 336 | - | ||
| 337 | - - name: Run API tests | ||
| 338 | - env: | ||
| 339 | - ES_HOST: http://localhost:9200 | ||
| 340 | - API_HOST: 127.0.0.1 | ||
| 341 | - API_PORT: 6003 | ||
| 342 | - TENANT_ID: test_tenant | ||
| 343 | - TESTING_MODE: true | ||
| 344 | - run: | | ||
| 345 | - python -m pytest tests/integration/test_api_integration.py \ | ||
| 346 | - -v \ | ||
| 347 | - --tb=short \ | ||
| 348 | - --json-report \ | ||
| 349 | - --json-report-file=test_logs/api_test_results.json | ||
| 350 | - | ||
| 351 | - - name: Stop API service | ||
| 352 | - if: always() | ||
| 353 | - run: | | ||
| 354 | - if [ -f api.pid ]; then | ||
| 355 | - kill $(cat api.pid) || true | ||
| 356 | - rm api.pid | ||
| 357 | - fi | ||
| 358 | - | ||
| 359 | - - name: Upload API test results | ||
| 360 | - uses: actions/upload-artifact@v3 | ||
| 361 | - if: always() | ||
| 362 | - with: | ||
| 363 | - name: api-test-results | ||
| 364 | - path: test_logs/api_test_results.json | ||
| 365 | - | ||
| 366 | - # 性能测试 | ||
| 367 | - performance-tests: | ||
| 368 | - runs-on: ubuntu-latest | ||
| 369 | - name: Performance Tests | ||
| 370 | - needs: [code-quality, unit-tests] | ||
| 371 | - if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' | ||
| 372 | - | ||
| 373 | - services: | ||
| 374 | - elasticsearch: | ||
| 375 | - image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0 | ||
| 376 | - env: | ||
| 377 | - discovery.type: single-node | ||
| 378 | - ES_JAVA_OPTS: -Xms2g -Xmx2g | ||
| 379 | - xpack.security.enabled: false | ||
| 380 | - ports: | ||
| 381 | - - 9200:9200 | ||
| 382 | - options: >- | ||
| 383 | - --health-cmd "curl http://localhost:9200/_cluster/health" | ||
| 384 | - --health-interval 10s | ||
| 385 | - --health-timeout 5s | ||
| 386 | - --health-retries 10 | ||
| 387 | - | ||
| 388 | - steps: | ||
| 389 | - - name: Checkout code | ||
| 390 | - uses: actions/checkout@v4 | ||
| 391 | - | ||
| 392 | - - name: Set up Python | ||
| 393 | - uses: actions/setup-python@v4 | ||
| 394 | - with: | ||
| 395 | - python-version: ${{ env.PYTHON_VERSION }} | ||
| 396 | - | ||
| 397 | - - name: Install dependencies | ||
| 398 | - run: | | ||
| 399 | - python -m pip install --upgrade pip | ||
| 400 | - pip install pytest locust | ||
| 401 | - pip install -r requirements.txt | ||
| 402 | - | ||
| 403 | - - name: Wait for Elasticsearch | ||
| 404 | - run: | | ||
| 405 | - echo "Waiting for Elasticsearch to be ready..." | ||
| 406 | - for i in {1..30}; do | ||
| 407 | - if curl -s http://localhost:9200/_cluster/health | grep -q '"status":"green\|yellow"'; then | ||
| 408 | - echo "Elasticsearch is ready" | ||
| 409 | - break | ||
| 410 | - fi | ||
| 411 | - sleep 2 | ||
| 412 | - done | ||
| 413 | - | ||
| 414 | - - name: Setup test data | ||
| 415 | - run: | | ||
| 416 | - # 创建并填充测试索引 | ||
| 417 | - python scripts/create_test_data.py --count 1000 | ||
| 418 | - | ||
| 419 | - - name: Run performance tests | ||
| 420 | - env: | ||
| 421 | - ES_HOST: http://localhost:9200 | ||
| 422 | - TESTING_MODE: true | ||
| 423 | - run: | | ||
| 424 | - python scripts/run_performance_tests.py | ||
| 425 | - | ||
| 426 | - - name: Upload performance results | ||
| 427 | - uses: actions/upload-artifact@v3 | ||
| 428 | - if: always() | ||
| 429 | - with: | ||
| 430 | - name: performance-test-results | ||
| 431 | - path: performance_results/ | ||
| 432 | - | ||
| 433 | - # 安全扫描 | ||
| 434 | - security-scan: | 11 | + service-contract-tests: |
| 435 | runs-on: ubuntu-latest | 12 | runs-on: ubuntu-latest |
| 436 | - name: Security Scan | ||
| 437 | - needs: [code-quality] | 13 | + name: Service Contract Tests |
| 438 | 14 | ||
| 439 | steps: | 15 | steps: |
| 440 | - - name: Checkout code | ||
| 441 | - uses: actions/checkout@v4 | ||
| 442 | - | ||
| 443 | - - name: Set up Python | ||
| 444 | - uses: actions/setup-python@v4 | ||
| 445 | - with: | ||
| 446 | - python-version: ${{ env.PYTHON_VERSION }} | ||
| 447 | - | ||
| 448 | - - name: Install security scanning tools | ||
| 449 | - run: | | ||
| 450 | - python -m pip install --upgrade pip | ||
| 451 | - pip install safety bandit | ||
| 452 | - | ||
| 453 | - - name: Run Safety (dependency check) | ||
| 454 | - run: | | ||
| 455 | - safety check --json --output safety_report.json || true | ||
| 456 | - | ||
| 457 | - - name: Run Bandit (security linter) | ||
| 458 | - run: | | ||
| 459 | - bandit -r . -f json -o bandit_report.json || true | ||
| 460 | - | ||
| 461 | - - name: Upload security reports | ||
| 462 | - uses: actions/upload-artifact@v3 | ||
| 463 | - if: always() | ||
| 464 | - with: | ||
| 465 | - name: security-reports | ||
| 466 | - path: | | ||
| 467 | - safety_report.json | ||
| 468 | - bandit_report.json | ||
| 469 | - | ||
| 470 | - # 测试结果汇总 | ||
| 471 | - test-summary: | ||
| 472 | - runs-on: ubuntu-latest | ||
| 473 | - name: Test Summary | ||
| 474 | - needs: [unit-tests, integration-tests, api-tests, security-scan] | ||
| 475 | - if: always() | ||
| 476 | - | ||
| 477 | - steps: | ||
| 478 | - - name: Checkout code | ||
| 479 | - uses: actions/checkout@v4 | ||
| 480 | - | ||
| 481 | - - name: Download all test artifacts | ||
| 482 | - uses: actions/download-artifact@v3 | ||
| 483 | - | ||
| 484 | - - name: Generate test summary | ||
| 485 | - run: | | ||
| 486 | - python scripts/generate_test_summary.py | ||
| 487 | - | ||
| 488 | - - name: Upload final report | ||
| 489 | - uses: actions/upload-artifact@v3 | ||
| 490 | - with: | ||
| 491 | - name: final-test-report | ||
| 492 | - path: final_test_report.* | ||
| 493 | - | ||
| 494 | - - name: Comment PR with results | ||
| 495 | - if: github.event_name == 'pull_request' | ||
| 496 | - uses: actions/github-script@v6 | ||
| 497 | - with: | ||
| 498 | - script: | | ||
| 499 | - const fs = require('fs'); | ||
| 500 | - | ||
| 501 | - // 读取测试报告 | ||
| 502 | - let reportContent = ''; | ||
| 503 | - try { | ||
| 504 | - reportContent = fs.readFileSync('final_test_report.txt', 'utf8'); | ||
| 505 | - } catch (e) { | ||
| 506 | - console.log('Could not read report file'); | ||
| 507 | - return; | ||
| 508 | - } | ||
| 509 | - | ||
| 510 | - // 提取摘要信息 | ||
| 511 | - const lines = reportContent.split('\n'); | ||
| 512 | - let summary = ''; | ||
| 513 | - let inSummary = false; | ||
| 514 | - | ||
| 515 | - for (const line of lines) { | ||
| 516 | - if (line.includes('测试摘要')) { | ||
| 517 | - inSummary = true; | ||
| 518 | - continue; | ||
| 519 | - } | ||
| 520 | - if (inSummary && line.includes('测试套件详情')) { | ||
| 521 | - break; | ||
| 522 | - } | ||
| 523 | - if (inSummary && line.trim()) { | ||
| 524 | - summary += line + '\n'; | ||
| 525 | - } | ||
| 526 | - } | ||
| 527 | - | ||
| 528 | - // 构建评论内容 | ||
| 529 | - const comment = `## 🧪 测试报告\n\n${summary}\n\n详细的测试报告请查看 [Artifacts](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) 部分。`; | ||
| 530 | - | ||
| 531 | - // 发送评论 | ||
| 532 | - github.rest.issues.createComment({ | ||
| 533 | - issue_number: context.issue.number, | ||
| 534 | - owner: context.repo.owner, | ||
| 535 | - repo: context.repo.repo, | ||
| 536 | - body: comment | ||
| 537 | - }); | ||
| 538 | \ No newline at end of file | 16 | \ No newline at end of file |
| 17 | + - name: Checkout code | ||
| 18 | + uses: actions/checkout@v4 | ||
| 19 | + | ||
| 20 | + - name: Set up Python | ||
| 21 | + uses: actions/setup-python@v5 | ||
| 22 | + with: | ||
| 23 | + python-version: "3.11" | ||
| 24 | + | ||
| 25 | + - name: Install dependencies | ||
| 26 | + run: | | ||
| 27 | + python -m pip install --upgrade pip | ||
| 28 | + pip install -r requirements.txt | ||
| 29 | + | ||
| 30 | + - name: Run CI contract tests | ||
| 31 | + run: | | ||
| 32 | + python -m pytest tests/ci -q | ||
| 539 | \ No newline at end of file | 33 | \ No newline at end of file |
README.md
| 1 | -# TODO | ||
| 2 | - | ||
| 3 | -**多语言索引**:已改为可配置的 `index_languages`(默认为 `["en", "zh"]`),商家可勾选主市场语言。支持语言见 `config.tenant_config_loader.SUPPORTED_INDEX_LANGUAGES`(含 en, zh, zh_tw, ru, ja, ko, es, fr, pt, de, it, th, vi, id, ms, ar, hi, he, my, ta, ur, bn, pl, nl, ro, tr, km, lo, yue, cs, el, sv, hu, da, fi, uk, bg 等)。 | ||
| 4 | - | ||
| 5 | -前端: | ||
| 6 | -搜索模态框 | ||
| 7 | -点击搜索的时候,弹出 搜索模态框,参考 react、AJAX等技术来实现,搜索模态框的页面宽度和原始页面相同(占满),左侧是suggestions,右侧是即使刷新的搜索结果(每输入一个字母都刷新一次结果)。 | ||
| 8 | -但是要注意:搜索过程中,后端不要触发翻译(因为输入过程中的query翻译结果会有问题),因此需要增加一个参数:搜索类型,默认为当前的回车后发起的搜索,如果是输入过程中的结果刷新则类型为typing。 | ||
| 9 | - | ||
| 10 | - | ||
| 11 | - | ||
| 12 | -多语言: | ||
| 13 | -语义: | ||
| 14 | -多语言: | ||
| 15 | -1. dis_max的方式 | ||
| 16 | -"query" : { | ||
| 17 | - "dis_max" : { | ||
| 18 | - "queries" : [ | ||
| 19 | - {"match" : { "title.en" : xxx }}, | ||
| 20 | - {"match" : { "title.zh" : xxx }}, | ||
| 21 | - {"match" : { "title_xx" : xxx }} | ||
| 22 | - ], | ||
| 23 | - "tie_breakler" : 0.8 | ||
| 24 | - } | ||
| 25 | -} | ||
| 26 | - | ||
| 27 | - | ||
| 28 | - | ||
| 29 | -"corss_field": | ||
| 30 | -"multi_match" : { | ||
| 31 | - "query" : | ||
| 32 | - "fields" : [...], | ||
| 33 | - "type": "cross_fields", | ||
| 34 | - "operator" : "and" | ||
| 35 | -} | ||
| 36 | - | ||
| 37 | - | ||
| 38 | -支持英文的拼写接错: | ||
| 39 | -title: multi_field | ||
| 40 | -"query" : { | ||
| 41 | - "query_string" : { | ||
| 42 | - "query": "xxx", | ||
| 43 | - "default_field": "title.ngram", | ||
| 44 | - "minimum_should_match": "85%" | ||
| 45 | - } | ||
| 46 | -} | ||
| 47 | - | ||
| 48 | - | ||
| 49 | - | ||
| 50 | -业务提权: | ||
| 51 | -rescore: window_size query_weight rescore_query_weight | ||
| 52 | - | ||
| 53 | -function_score: | ||
| 54 | -boost | ||
| 55 | -以及 可以把每个子查询用function_score包一下 | ||
| 56 | - | ||
| 57 | - | ||
| 58 | - | ||
| 59 | - | ||
| 60 | -query anchor | ||
| 61 | -我想给elasticsearch 增加字段 query anchor ,即哪些query点击到了这个doc,一个doc下面有多个query anchor,每个query anchor又有这两个属性:weight、dweight,分别代表 query在doc下的点击分布权重、doc在query下的点击分布权重。请问该如何设计这两个ES字段。 | ||
| 62 | - | ||
| 63 | -需要有zh en两套query anchor,因为他们的解析器不一样。 | ||
| 64 | - | ||
| 65 | -他的功能是辅助召回和排序。我搜索一个query,那么每个query跟 doc中的query anchor的相关性,也就是,除了将query到title 和keywords / brief等文本字段中搜索,也到 query anchor中搜索,从而辅助召回和相关性的计算。 | ||
| 66 | - | ||
| 67 | - | 1 | +# 电商搜索引擎 SaaS |
| 68 | 2 | ||
| 3 | +多租户、可配置、可扩展的电商搜索平台(Shoplazza 等独立站场景)。 | ||
| 69 | 4 | ||
| 5 | +README 用于给后续开发者建立统一认知:**系统框架、模块边界、设计原则、研发流程与 CI 测试入口**,帮助持续迭代时避免分叉设计与冗余代码。 | ||
| 70 | 6 | ||
| 71 | -# 电商搜索引擎 SaaS | 7 | +--- |
| 72 | 8 | ||
| 73 | -一个针对跨境独立站(店匠 Shoplazza 等)的多租户可配置搜索平台。README 作为项目导航入口,帮助你在不同阶段定位到更详细的文档。 | 9 | +## 1) 项目目标与边界 |
| 74 | 10 | ||
| 11 | +- **目标**:在统一架构下支持关键词检索、语义检索、分面过滤、多语言、重排、图片检索。 | ||
| 12 | +- **边界**:本仓库负责搜索核心能力与服务编排;业务方通过标准 HTTP API 对接。 | ||
| 13 | +- **核心约束**: | ||
| 14 | + - 调用方稳定(API/Provider 契约优先) | ||
| 15 | + - 配置单一来源(`config/config.yaml` + `.env` 覆盖) | ||
| 16 | + - 扩展优先走插件化(provider/backend),避免散落式分叉实现 | ||
| 75 | 17 | ||
| 76 | -## 项目环境 | 18 | +--- |
| 77 | 19 | ||
| 78 | -以项目根目录的 **`activate.sh`** 为准(**优先激活 venv:`./.venv`,并加载 `.env`;兼容 Conda 回退**): | 20 | +## 2) 快速开始 |
| 79 | 21 | ||
| 80 | ```bash | 22 | ```bash |
| 81 | -# 推荐:首次创建 venv(默认安装基础依赖) | 23 | +# 首次创建环境(默认基础依赖) |
| 82 | ./scripts/create_venv.sh | 24 | ./scripts/create_venv.sh |
| 83 | - | ||
| 84 | -# 如需本地向量/图片编码(会安装 torch/transformers 等重依赖) | ||
| 85 | -# INSTALL_ML=1 ./scripts/create_venv.sh | ||
| 86 | source activate.sh | 25 | source activate.sh |
| 87 | -``` | ||
| 88 | 26 | ||
| 89 | -新机器首次需创建环境,见 `docs/环境配置说明.md`(推荐 venv;Conda 为兼容旧流程)。 | 27 | +# 启动核心服务(backend/indexer/frontend) |
| 28 | +./run.sh | ||
| 90 | 29 | ||
| 91 | -## 测试pipeline | 30 | +# 可选:附加能力服务 |
| 31 | +START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh | ||
| 92 | 32 | ||
| 93 | -1. | ||
| 94 | -店铺1 tenant_id=162: | ||
| 95 | -fake数据 生成商品导入数据 提交到店匠的店铺: | ||
| 96 | -cd /data/saas-search && source activate.sh && python scripts/csv_to_excel_multi_variant.py --output with_colors.xlsx | 33 | +# 查看状态 |
| 34 | +./scripts/service_ctl.sh status | ||
| 35 | +``` | ||
| 97 | 36 | ||
| 98 | -店铺2 tenant_id= | 37 | +核心端口: |
| 99 | 38 | ||
| 39 | +- `6002` backend(`/search/*`, `/admin/*`) | ||
| 40 | +- `6004` indexer(`/indexer/*`) | ||
| 41 | +- `6003` frontend | ||
| 42 | +- `6005` embedding(可选) | ||
| 43 | +- `6006` translator(可选) | ||
| 44 | +- `6007` reranker(可选) | ||
| 100 | 45 | ||
| 101 | -2. 后端:自动同步到mysql | 46 | +更完整示例见 `docs/QUICKSTART.md`。 |
| 102 | 47 | ||
| 103 | -3. mysql到ES: | 48 | +--- |
| 104 | 49 | ||
| 105 | -python scripts/recreate_and_import.py \ | ||
| 106 | - --tenant-id 162 \ | ||
| 107 | - --db-host <mysql_host> \ | ||
| 108 | - --db-database saas \ | ||
| 109 | - --db-username saas \ | ||
| 110 | - --db-password <password> \ | ||
| 111 | - --es-host http://localhost:9200 | 50 | +## 3) 总体架构(开发者视角) |
| 112 | 51 | ||
| 113 | -构造查询: | ||
| 114 | -参考 @常用查询 - ES.md | 52 | +- `api/`:统一 API 入口(search/admin/indexer app) |
| 53 | +- `search/`:召回、排序、结果组织 | ||
| 54 | +- `query/`:查询解析、多语言处理、改写 | ||
| 55 | +- `indexer/`:MySQL 行数据 -> ES 文档的转换与索引流程 | ||
| 56 | +- `providers/`:能力调用抽象(translation/embedding/rerank) | ||
| 57 | +- `reranker/`:重排服务及后端实现 | ||
| 58 | +- `embeddings/`:向量服务(文本/图像) | ||
| 59 | +- `config/`:配置加载与服务配置解析 | ||
| 115 | 60 | ||
| 61 | +关键设计:**Provider(调用方式)与 Backend(推理实现)分离**,新增能力优先在协议与工厂注册,不改调用方主流程。 | ||
| 116 | 62 | ||
| 117 | -## 核心能力速览 | 63 | +--- |
| 118 | 64 | ||
| 119 | -- **多语言 + 自动翻译**:中文、英文、俄文等语言检测与路由(BGE-M3、DeepL) | ||
| 120 | -- **语义 + 关键词混排**:BM25、dense vector(BGE-M3/CN-CLIP)融合 | ||
| 121 | -- **布尔与分面**:AND / OR / ANDNOT / RANK、Terms & Range facets | ||
| 122 | -- **多租户隔离**:共享 `search_products` 索引,通过 `tenant_id` 严格隔离 | ||
| 123 | -- **可配置化**:字段/索引域/排序表达式/查询改写全部配置驱动 | ||
| 124 | -- **脚本化流水线**:Mock/CSV 数据 → MySQL → Elasticsearch → API/前端 | 65 | +## 4) 设计原则(避免后续分叉) |
| 125 | 66 | ||
| 126 | -## 新人入口 | 67 | +- **单一配置源**:服务地址、provider 选择、后端参数统一在 `config/config.yaml`,环境变量仅做覆盖。 |
| 68 | +- **接口契约优先**:外部 API 契约与 provider 契约稳定,内部重构不影响调用方。 | ||
| 69 | +- **扩展走工厂**:新增 provider/backend 必须在工厂函数中显式注册,禁止旁路分支。 | ||
| 70 | +- **可观测性优先**:健康检查、关键日志、请求上下文必须可追踪。 | ||
| 71 | +- **测试优先保障契约**:CI 首先保证接口契约和核心路径可用,再逐步扩展性能与业务测试。 | ||
| 127 | 72 | ||
| 128 | -**→ 开发者必读**:[docs/DEVELOPER_GUIDE.md](docs/DEVELOPER_GUIDE.md) — 项目全貌、设计原则、扩展规范与迭代检查清单,保证后续开发在统一框架内进行。 | 73 | +--- |
| 129 | 74 | ||
| 130 | -**→ 快速上手**:[docs/QUICKSTART.md](docs/QUICKSTART.md) — 环境、服务、模块、请求示例一页搞定。 | 75 | +## 5) 文档入口(建议阅读顺序) |
| 131 | 76 | ||
| 132 | | 步骤 | 文档 | | 77 | | 步骤 | 文档 | |
| 133 | |------|------| | 78 | |------|------| |
| 134 | -| 0. 框架与规范(推荐首读) | `docs/DEVELOPER_GUIDE.md` | | ||
| 135 | -| 1. 环境与启动 | `docs/QUICKSTART.md` | | ||
| 136 | -| 2. 搜索/索引 API | `docs/QUICKSTART.md` §3、`docs/搜索API速查表.md` | | ||
| 137 | -| 3. 运维与故障 | `docs/Usage-Guide.md` | | ||
| 138 | -| 4. 架构与扩展 | `docs/PROVIDER_ARCHITECTURE.md`、`docs/MODULE_EXTENSION_SPEC.md`、`docs/系统设计文档.md` | | ||
| 139 | - | ||
| 140 | -### Runtimes & 命令示例 | 79 | +| 0. 全局规范(首读) | `docs/DEVELOPER_GUIDE.md` | |
| 80 | +| 1. 开发与配置 | `docs/QUICKSTART.md` | | ||
| 81 | +| 2. 运行与排障 | `docs/Usage-Guide.md` | | ||
| 82 | +| 3. API 详细说明 | `docs/搜索API对接指南.md` | | ||
| 83 | +| 4. 快速参数速查 | `docs/搜索API速查表.md` | | ||
| 84 | +| 5. 首次环境搭建 | `docs/环境配置说明.md` | | ||
| 141 | 85 | ||
| 142 | -```bash | ||
| 143 | -# 1. 安装依赖与准备服务(环境创建见 docs/环境配置说明.md) | ||
| 144 | -source activate.sh # 或先 export CONDA_ROOT=你的conda路径 | ||
| 145 | -pip install -r requirements.txt # 若用 environment.yml 创建环境可省略 | ||
| 146 | -docker run -d --name es -p 9200:9200 elasticsearch:8.11.0 | 86 | +--- |
| 147 | 87 | ||
| 148 | -# 2. 构造测试数据并导入 MySQL | ||
| 149 | -./scripts/mock_data.sh # 详见 TEST_DATA_GUIDE.md | 88 | +## 6) 持续集成测试(推荐最小集) |
| 150 | 89 | ||
| 151 | -# 3. 创建租户索引结构并导入数据(推荐) | ||
| 152 | -./scripts/create_tenant_index.sh 162 | ||
| 153 | -curl -X POST "http://localhost:6004/indexer/reindex" \ | ||
| 154 | - -H "Content-Type: application/json" \ | ||
| 155 | - -d '{"tenant_id":"162","batch_size":500}' | 90 | +本仓库提供一套轻量、稳定、易维护的 CI 测试入口,覆盖以下服务契约: |
| 156 | 91 | ||
| 157 | -# 4. 启动核心服务(backend/indexer/frontend) | ||
| 158 | -./run.sh | 92 | +- 搜索接口(search API) |
| 93 | +- 索引接口(indexer API) | ||
| 94 | +- 向量服务(embedding service) | ||
| 95 | +- 翻译服务(translator service) | ||
| 96 | +- 重排服务(reranker service) | ||
| 159 | 97 | ||
| 160 | -# (可选)附加启动 embedding / translator / reranker | ||
| 161 | -START_EMBEDDING=1 START_TRANSLATOR=1 START_RERANKER=1 ./run.sh | ||
| 162 | -# | ||
| 163 | -# 查看服务状态 / 停止 | ||
| 164 | -./scripts/service_ctl.sh status | ||
| 165 | -./scripts/stop.sh | 98 | +本地运行: |
| 166 | 99 | ||
| 167 | -# 5. 调用文本搜索 API | ||
| 168 | -curl -X POST http://localhost:6002/search/ \ | ||
| 169 | - -H "Content-Type: application/json" \ | ||
| 170 | - -H "X-Tenant-ID: 1" \ | ||
| 171 | - -d '{"query": "玩具", "size": 10}' | 100 | +```bash |
| 101 | +source activate.sh | ||
| 102 | +python -m pytest tests/ci -q | ||
| 172 | ``` | 103 | ``` |
| 173 | 104 | ||
| 174 | -## 文档索引 | 105 | +该测试集采用 mock/stub,**不依赖真实 ES/MySQL/大模型服务**,适合作为 PR 级快速回归门禁。 |
| 175 | 106 | ||
| 176 | -| 文档 | 用途 | | ||
| 177 | -|------|------| | ||
| 178 | -| `docs/DEVELOPER_GUIDE.md` | **开发者开放指南**:全貌、原则、规范、检查清单 | | ||
| 179 | -| `docs/QUICKSTART.md` | 新人上手:环境、服务、模块、请求 | | ||
| 180 | -| `docs/Usage-Guide.md` | 运维:日志、多环境、故障排查 | | ||
| 181 | -| `docs/搜索API速查表.md` | 搜索 API 参数速查 | | ||
| 182 | -| `docs/搜索API对接指南.md` | 搜索 API 完整说明 | | ||
| 183 | -| `docs/PROVIDER_ARCHITECTURE.md` | 翻译/向量/重排 provider 扩展 | | ||
| 184 | -| `docs/MODULE_EXTENSION_SPEC.md` | 向量/重排后端可插拔规范 | | ||
| 185 | -| `docs/环境配置说明.md` | 首次部署、新机器环境 | | ||
| 186 | -| `docs/系统设计文档.md` | 架构与模块细节 | | ||
| 187 | - | ||
| 188 | -## 关键工作流指引 | ||
| 189 | - | ||
| 190 | -- **数据构建 → MySQL → Elasticsearch** | ||
| 191 | - - `scripts/mock_data.sh`:Tenant1 Mock + Tenant2 CSV 一条龙 | ||
| 192 | - - `scripts/create_tenant_index.sh <tenant_id>` + `POST /indexer/reindex`:推荐导入链路 | ||
| 193 | - - 详解:`测试数据指南.md` | ||
| 194 | - | ||
| 195 | -- **索引富化 & Java 对接** | ||
| 196 | - - Java 索引程序负责:全量/增量调度 + 从 MySQL 查询 `shoplazza_product_spu/sku/option/...` | ||
| 197 | - - Python `indexer` 模块负责:**MySQL 行 → ES doc** 的全部逻辑(多语言、翻译、向量、规格聚合等) | ||
| 198 | - - 正式对接接口(推荐): | ||
| 199 | - - `POST http://<indexer_host>:6004/indexer/build-docs` | ||
| 200 | - - 入参:`tenant_id + items[{spu, skus, options}]` | ||
| 201 | - - 出参:与 `mappings/search_products.json` 完全一致的 `docs` 列表,上游自行写入 ES | ||
| 202 | - - 调试/自测接口(内部使用): | ||
| 203 | - - `POST http://127.0.0.1:6004/indexer/build-docs-from-db`,只需要 `tenant_id + spu_ids`,由服务内部查库并返回 ES doc | ||
| 204 | - - 详解:`indexer/README.md`、`docs/索引字段说明v2.md` | ||
| 205 | - | ||
| 206 | -- **搜索服务 & API** | ||
| 207 | - - `api/`(FastAPI)承载 REST API,`search/` + `query/` 负责查询解析与下发 | ||
| 208 | - - API、分页、过滤、Facet、KNN 等:`搜索API对接指南.md` | ||
| 209 | - - 对接案例、示例与错误码:`搜索API对接指南.md`、`Search-API-Examples.md` | ||
| 210 | - | ||
| 211 | -- **统一配置** | ||
| 212 | - - 所有租户共享统一的索引结构和查询配置(硬编码) | ||
| 213 | - - 索引 mapping: `mappings/search_products.json` | ||
| 214 | - - 查询配置: `search/query_config.py` | ||
| 215 | - - 详解:`基础配置指南.md`、`索引字段说明v2.md` | ||
| 216 | - | ||
| 217 | -## 仓库结构(概览) | 107 | +--- |
| 218 | 108 | ||
| 219 | -``` | ||
| 220 | -api/ FastAPI 服务与路由 | ||
| 221 | -config/ 字段/索引/查询配置体系 | ||
| 222 | -indexer/ MySQL → ES 管道(mapping / transformer / bulk) | ||
| 223 | -query/ 查询解析、改写、翻译、embedding | ||
| 224 | -search/ 多语言构建、布尔解析、排序引擎 | ||
| 225 | -scripts/ 数据/服务脚本(mock_data, ingest, run 等) | ||
| 226 | -frontend/ 简易调试页面 | ||
| 227 | -docs/ 运营及中文资料 | ||
| 228 | -``` | 109 | +## 7) 代码质量与持续继承要求 |
| 110 | + | ||
| 111 | +- 新增功能必须补最小测试(至少覆盖 1 条成功路径 + 1 条参数异常路径) | ||
| 112 | +- 修改公共协议时必须同步更新: | ||
| 113 | + - `docs/QUICKSTART.md` | ||
| 114 | + - 对应服务 README / API 文档 | ||
| 115 | + - `tests/ci` 契约用例 | ||
| 116 | +- 禁止新增“临时分支逻辑”绕过 provider/backend 工厂 | ||
| 117 | +- 优先减少重复实现,复用现有转换链路与配置解析入口 |
docs/QUICKSTART.md
| @@ -27,6 +27,7 @@ | @@ -27,6 +27,7 @@ | ||
| 27 | 4. [模块扩展规范(Embedding / Rerank)](#4-模块扩展规范embedding--rerank) | 27 | 4. [模块扩展规范(Embedding / Rerank)](#4-模块扩展规范embedding--rerank) |
| 28 | 5. [验证、日志与常见排障入口](#5-验证日志与常见排障入口) | 28 | 5. [验证、日志与常见排障入口](#5-验证日志与常见排障入口) |
| 29 | 6. [相关文档](#6-相关文档) | 29 | 6. [相关文档](#6-相关文档) |
| 30 | +7. [持续集成测试(最小可维护方案)](#7-持续集成测试最小可维护方案) | ||
| 30 | 31 | ||
| 31 | --- | 32 | --- |
| 32 | 33 | ||
| @@ -374,3 +375,32 @@ lsof -i :6004 | @@ -374,3 +375,32 @@ lsof -i :6004 | ||
| 374 | | `indexer/README.md` | 索引模块职责与接口 | | 375 | | `indexer/README.md` | 索引模块职责与接口 | |
| 375 | | `embeddings/README.md` | 向量化服务说明 | | 376 | | `embeddings/README.md` | 向量化服务说明 | |
| 376 | | `reranker/README.md` | 重排服务说明 | | 377 | | `reranker/README.md` | 重排服务说明 | |
| 378 | + | ||
| 379 | +--- | ||
| 380 | + | ||
| 381 | +## 7. 持续集成测试(最小可维护方案) | ||
| 382 | + | ||
| 383 | +目标:让后续开发者在不依赖真实 ES/MySQL/模型服务的前提下,快速验证核心服务契约不被破坏。 | ||
| 384 | + | ||
| 385 | +### 7.1 测试范围 | ||
| 386 | + | ||
| 387 | +`tests/ci/test_service_api_contracts.py` 覆盖: | ||
| 388 | + | ||
| 389 | +- 搜索接口:`/search/`、`/search/image`、`/search/suggestions` | ||
| 390 | +- 索引接口:`/indexer/reindex`、`/indexer/index`、`/indexer/build-docs` | ||
| 391 | +- 向量服务:`/embed/text`、`/embed/image` | ||
| 392 | +- 翻译服务:`/translate`、`/health` | ||
| 393 | +- 重排服务:`/rerank`、`/health` | ||
| 394 | + | ||
| 395 | +### 7.2 运行方式 | ||
| 396 | + | ||
| 397 | +```bash | ||
| 398 | +source activate.sh | ||
| 399 | +python -m pytest tests/ci -q | ||
| 400 | +``` | ||
| 401 | + | ||
| 402 | +### 7.3 设计取舍 | ||
| 403 | + | ||
| 404 | +- 使用 mock/stub 注入依赖,确保测试快且稳定 | ||
| 405 | +- 重点测“接口契约与参数行为”,而不是底层模型质量 | ||
| 406 | +- 作为 PR 级门禁;真实环境联调放在运维/预发布流程 |
| @@ -0,0 +1,281 @@ | @@ -0,0 +1,281 @@ | ||
| 1 | +from __future__ import annotations | ||
| 2 | + | ||
| 3 | +from types import SimpleNamespace | ||
| 4 | +from typing import Any, Dict, List | ||
| 5 | + | ||
| 6 | +import numpy as np | ||
| 7 | +import pytest | ||
| 8 | +from fastapi.testclient import TestClient | ||
| 9 | + | ||
| 10 | + | ||
| 11 | +class _FakeSearcher: | ||
| 12 | + def search(self, **kwargs): | ||
| 13 | + return SimpleNamespace( | ||
| 14 | + results=[ | ||
| 15 | + { | ||
| 16 | + "spu_id": "spu-1", | ||
| 17 | + "title": "测试商品", | ||
| 18 | + "price": 99.0, | ||
| 19 | + "currency": "USD", | ||
| 20 | + "in_stock": True, | ||
| 21 | + "skus": [], | ||
| 22 | + "relevance_score": 1.2, | ||
| 23 | + } | ||
| 24 | + ], | ||
| 25 | + total=1, | ||
| 26 | + max_score=1.2, | ||
| 27 | + took_ms=8, | ||
| 28 | + facets=[], | ||
| 29 | + query_info={"normalized_query": kwargs.get("query", "")}, | ||
| 30 | + suggestions=[], | ||
| 31 | + related_searches=[], | ||
| 32 | + debug_info=None, | ||
| 33 | + ) | ||
| 34 | + | ||
| 35 | + def search_by_image(self, **kwargs): | ||
| 36 | + return self.search(**kwargs) | ||
| 37 | + | ||
| 38 | + | ||
| 39 | +class _FakeSuggestionService: | ||
| 40 | + def search(self, **kwargs): | ||
| 41 | + return { | ||
| 42 | + "query": kwargs["query"], | ||
| 43 | + "language": kwargs.get("language", "en"), | ||
| 44 | + "resolved_language": kwargs.get("language", "en"), | ||
| 45 | + "suggestions": [{"text": "iphone 15", "score": 1.0}], | ||
| 46 | + "took_ms": 3, | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + | ||
| 50 | +@pytest.fixture | ||
| 51 | +def search_client(monkeypatch): | ||
| 52 | + import api.app as search_app | ||
| 53 | + | ||
| 54 | + monkeypatch.setattr(search_app, "init_service", lambda es_host="": None) | ||
| 55 | + monkeypatch.setattr(search_app, "get_searcher", lambda: _FakeSearcher()) | ||
| 56 | + monkeypatch.setattr(search_app, "get_suggestion_service", lambda: _FakeSuggestionService()) | ||
| 57 | + | ||
| 58 | + with TestClient(search_app.app) as client: | ||
| 59 | + yield client | ||
| 60 | + | ||
| 61 | + | ||
| 62 | +def test_search_api_contract(search_client: TestClient): | ||
| 63 | + response = search_client.post( | ||
| 64 | + "/search/", | ||
| 65 | + headers={"X-Tenant-ID": "162"}, | ||
| 66 | + json={"query": "toy", "size": 5}, | ||
| 67 | + ) | ||
| 68 | + assert response.status_code == 200 | ||
| 69 | + data = response.json() | ||
| 70 | + assert data["total"] == 1 | ||
| 71 | + assert data["results"][0]["spu_id"] == "spu-1" | ||
| 72 | + | ||
| 73 | + | ||
| 74 | +def test_image_search_api_contract(search_client: TestClient): | ||
| 75 | + response = search_client.post( | ||
| 76 | + "/search/image", | ||
| 77 | + headers={"X-Tenant-ID": "162"}, | ||
| 78 | + json={"image_url": "https://example.com/a.jpg", "size": 3}, | ||
| 79 | + ) | ||
| 80 | + assert response.status_code == 200 | ||
| 81 | + assert response.json()["results"][0]["spu_id"] == "spu-1" | ||
| 82 | + | ||
| 83 | + | ||
| 84 | +def test_suggestion_api_contract(search_client: TestClient): | ||
| 85 | + response = search_client.get( | ||
| 86 | + "/search/suggestions?q=iph&size=5&language=en", | ||
| 87 | + headers={"X-Tenant-ID": "162"}, | ||
| 88 | + ) | ||
| 89 | + assert response.status_code == 200 | ||
| 90 | + data = response.json() | ||
| 91 | + assert data["query"] == "iph" | ||
| 92 | + assert len(data["suggestions"]) == 1 | ||
| 93 | + | ||
| 94 | + | ||
| 95 | +class _FakeBulkService: | ||
| 96 | + def bulk_index(self, tenant_id: str, recreate_index: bool, batch_size: int): | ||
| 97 | + return { | ||
| 98 | + "tenant_id": tenant_id, | ||
| 99 | + "recreate_index": recreate_index, | ||
| 100 | + "batch_size": batch_size, | ||
| 101 | + "success": True, | ||
| 102 | + } | ||
| 103 | + | ||
| 104 | + | ||
| 105 | +class _FakeTransformer: | ||
| 106 | + def transform_spu_to_doc(self, tenant_id: str, spu_row, skus, options): | ||
| 107 | + return { | ||
| 108 | + "tenant_id": tenant_id, | ||
| 109 | + "spu_id": str(spu_row.get("id", "0")), | ||
| 110 | + "title": {"zh": str(spu_row.get("title", ""))}, | ||
| 111 | + } | ||
| 112 | + | ||
| 113 | + | ||
| 114 | +class _FakeIncrementalService: | ||
| 115 | + def index_spus_to_es(self, es_client, tenant_id: str, spu_ids: List[str], delete_spu_ids=None): | ||
| 116 | + return { | ||
| 117 | + "tenant_id": tenant_id, | ||
| 118 | + "spu_ids": [{"spu_id": s, "status": "indexed"} for s in spu_ids], | ||
| 119 | + "delete_spu_ids": [], | ||
| 120 | + "total": len(spu_ids), | ||
| 121 | + "success_count": len(spu_ids), | ||
| 122 | + "failed_count": 0, | ||
| 123 | + } | ||
| 124 | + | ||
| 125 | + def _get_transformer_bundle(self, tenant_id: str): | ||
| 126 | + return _FakeTransformer(), None, False | ||
| 127 | + | ||
| 128 | + | ||
| 129 | +@pytest.fixture | ||
| 130 | +def indexer_client(monkeypatch): | ||
| 131 | + import api.indexer_app as indexer_app | ||
| 132 | + import api.routes.indexer as indexer_routes | ||
| 133 | + | ||
| 134 | + monkeypatch.setattr(indexer_app, "init_indexer_service", lambda es_host="": None) | ||
| 135 | + monkeypatch.setattr(indexer_routes, "get_bulk_indexing_service", lambda: _FakeBulkService()) | ||
| 136 | + monkeypatch.setattr(indexer_routes, "get_incremental_service", lambda: _FakeIncrementalService()) | ||
| 137 | + monkeypatch.setattr(indexer_routes, "get_es_client", lambda: object()) | ||
| 138 | + | ||
| 139 | + with TestClient(indexer_app.app) as client: | ||
| 140 | + yield client | ||
| 141 | + | ||
| 142 | + | ||
| 143 | +def test_indexer_reindex_contract(indexer_client: TestClient): | ||
| 144 | + response = indexer_client.post( | ||
| 145 | + "/indexer/reindex", | ||
| 146 | + json={"tenant_id": "162", "batch_size": 100}, | ||
| 147 | + ) | ||
| 148 | + assert response.status_code == 200 | ||
| 149 | + assert response.json()["success"] is True | ||
| 150 | + | ||
| 151 | + | ||
| 152 | +def test_indexer_incremental_contract(indexer_client: TestClient): | ||
| 153 | + response = indexer_client.post( | ||
| 154 | + "/indexer/index", | ||
| 155 | + json={"tenant_id": "162", "spu_ids": ["1001", "1002"]}, | ||
| 156 | + ) | ||
| 157 | + assert response.status_code == 200 | ||
| 158 | + data = response.json() | ||
| 159 | + assert data["success_count"] == 2 | ||
| 160 | + | ||
| 161 | + | ||
| 162 | +def test_indexer_build_docs_contract(indexer_client: TestClient): | ||
| 163 | + response = indexer_client.post( | ||
| 164 | + "/indexer/build-docs", | ||
| 165 | + json={ | ||
| 166 | + "tenant_id": "162", | ||
| 167 | + "items": [{"spu": {"id": 1, "title": "T-shirt"}, "skus": [], "options": []}], | ||
| 168 | + }, | ||
| 169 | + ) | ||
| 170 | + assert response.status_code == 200 | ||
| 171 | + data = response.json() | ||
| 172 | + assert data["success_count"] == 1 | ||
| 173 | + assert data["docs"][0]["spu_id"] == "1" | ||
| 174 | + | ||
| 175 | + | ||
| 176 | +class _FakeTextModel: | ||
| 177 | + def encode_batch(self, texts, batch_size=32, device="cpu"): | ||
| 178 | + return [np.array([0.1, 0.2, 0.3], dtype=np.float32) for _ in texts] | ||
| 179 | + | ||
| 180 | + | ||
| 181 | +class _FakeImageModel: | ||
| 182 | + def encode_image_urls(self, urls, batch_size=8): | ||
| 183 | + return [np.array([0.3, 0.2, 0.1], dtype=np.float32) for _ in urls] | ||
| 184 | + | ||
| 185 | + | ||
| 186 | +@pytest.fixture | ||
| 187 | +def embedding_client(): | ||
| 188 | + import embeddings.server as emb_server | ||
| 189 | + | ||
| 190 | + emb_server.app.router.on_startup.clear() | ||
| 191 | + emb_server._text_model = _FakeTextModel() | ||
| 192 | + emb_server._image_model = _FakeImageModel() | ||
| 193 | + | ||
| 194 | + with TestClient(emb_server.app) as client: | ||
| 195 | + yield client | ||
| 196 | + | ||
| 197 | + | ||
| 198 | +def test_embedding_text_contract(embedding_client: TestClient): | ||
| 199 | + response = embedding_client.post("/embed/text", json=["hello", "world"]) | ||
| 200 | + assert response.status_code == 200 | ||
| 201 | + data = response.json() | ||
| 202 | + assert len(data) == 2 | ||
| 203 | + assert len(data[0]) == 3 | ||
| 204 | + | ||
| 205 | + | ||
| 206 | +def test_embedding_image_contract(embedding_client: TestClient): | ||
| 207 | + response = embedding_client.post("/embed/image", json=["https://example.com/a.jpg"]) | ||
| 208 | + assert response.status_code == 200 | ||
| 209 | + assert len(response.json()[0]) == 3 | ||
| 210 | + | ||
| 211 | + | ||
| 212 | +class _FakeTranslator: | ||
| 213 | + model = "qwen" | ||
| 214 | + use_cache = True | ||
| 215 | + | ||
| 216 | + def translate(self, text: str, target_lang: str, source_lang: str | None = None, prompt: str | None = None): | ||
| 217 | + return f"{text}-{target_lang}" | ||
| 218 | + | ||
| 219 | + | ||
| 220 | +@pytest.fixture | ||
| 221 | +def translator_client(monkeypatch): | ||
| 222 | + import api.translator_app as translator_app | ||
| 223 | + | ||
| 224 | + translator_app.app.router.on_startup.clear() | ||
| 225 | + monkeypatch.setattr(translator_app, "get_translator", lambda model="qwen": _FakeTranslator()) | ||
| 226 | + | ||
| 227 | + with TestClient(translator_app.app) as client: | ||
| 228 | + yield client | ||
| 229 | + | ||
| 230 | + | ||
| 231 | +def test_translator_api_contract(translator_client: TestClient): | ||
| 232 | + response = translator_client.post( | ||
| 233 | + "/translate", | ||
| 234 | + json={"text": "商品名称", "target_lang": "en", "source_lang": "zh"}, | ||
| 235 | + ) | ||
| 236 | + assert response.status_code == 200 | ||
| 237 | + assert response.json()["translated_text"] == "商品名称-en" | ||
| 238 | + | ||
| 239 | + | ||
| 240 | +def test_translator_health_contract(translator_client: TestClient): | ||
| 241 | + response = translator_client.get("/health") | ||
| 242 | + assert response.status_code == 200 | ||
| 243 | + assert response.json()["status"] == "healthy" | ||
| 244 | + | ||
| 245 | + | ||
| 246 | +class _FakeReranker: | ||
| 247 | + _model_name = "fake-reranker" | ||
| 248 | + | ||
| 249 | + def score_with_meta(self, query: str, docs: List[str], normalize: bool = True): | ||
| 250 | + scores = [float(i + 1) for i in range(len(docs))] | ||
| 251 | + meta: Dict[str, Any] = {"input_docs": len(docs), "unique_docs": len(set(docs))} | ||
| 252 | + return scores, meta | ||
| 253 | + | ||
| 254 | + | ||
| 255 | +@pytest.fixture | ||
| 256 | +def reranker_client(): | ||
| 257 | + import reranker.server as reranker_server | ||
| 258 | + | ||
| 259 | + reranker_server.app.router.on_startup.clear() | ||
| 260 | + reranker_server._reranker = _FakeReranker() | ||
| 261 | + reranker_server._backend_name = "fake" | ||
| 262 | + | ||
| 263 | + with TestClient(reranker_server.app) as client: | ||
| 264 | + yield client | ||
| 265 | + | ||
| 266 | + | ||
| 267 | +def test_reranker_api_contract(reranker_client: TestClient): | ||
| 268 | + response = reranker_client.post( | ||
| 269 | + "/rerank", | ||
| 270 | + json={"query": "wireless mouse", "docs": ["doc-a", "doc-b"]}, | ||
| 271 | + ) | ||
| 272 | + assert response.status_code == 200 | ||
| 273 | + data = response.json() | ||
| 274 | + assert data["scores"] == [1.0, 2.0] | ||
| 275 | + assert data["meta"]["input_docs"] == 2 | ||
| 276 | + | ||
| 277 | + | ||
| 278 | +def test_reranker_health_contract(reranker_client: TestClient): | ||
| 279 | + response = reranker_client.get("/health") | ||
| 280 | + assert response.status_code == 200 | ||
| 281 | + assert response.json()["status"] == "ok" |
tests/test_cloud_embedding.py
| @@ -11,6 +11,8 @@ import time | @@ -11,6 +11,8 @@ import time | ||
| 11 | from datetime import datetime | 11 | from datetime import datetime |
| 12 | from pathlib import Path | 12 | from pathlib import Path |
| 13 | 13 | ||
| 14 | +import pytest | ||
| 15 | + | ||
| 14 | # Add parent directory to path | 16 | # Add parent directory to path |
| 15 | sys.path.insert(0, str(Path(__file__).parent.parent)) | 17 | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| 16 | 18 | ||
| @@ -44,6 +46,7 @@ def read_queries(file_path: str, limit: int = 100) -> list: | @@ -44,6 +46,7 @@ def read_queries(file_path: str, limit: int = 100) -> list: | ||
| 44 | return queries | 46 | return queries |
| 45 | 47 | ||
| 46 | 48 | ||
| 49 | +@pytest.mark.skip(reason="Requires data file and DASHSCOPE_API_KEY; run manually when needed") | ||
| 47 | def test_cloud_embedding(queries_file: str, num_queries: int = 100): | 50 | def test_cloud_embedding(queries_file: str, num_queries: int = 100): |
| 48 | """ | 51 | """ |
| 49 | Test cloud embedding with queries from file. | 52 | Test cloud embedding with queries from file. |
tests/test_cnclip_service.py
| @@ -13,11 +13,17 @@ CN-CLIP 服务测试脚本 | @@ -13,11 +13,17 @@ CN-CLIP 服务测试脚本 | ||
| 13 | """ | 13 | """ |
| 14 | 14 | ||
| 15 | import sys | 15 | import sys |
| 16 | -import numpy as np | ||
| 17 | -from clip_client import Client | ||
| 18 | 16 | ||
| 17 | +import pytest | ||
| 19 | 18 | ||
| 20 | -def test_encoding(client, test_name, inputs): | 19 | +try: |
| 20 | + import numpy as np | ||
| 21 | + from clip_client import Client | ||
| 22 | +except ImportError: | ||
| 23 | + pytest.skip("clip_client not installed (optional clip-as-service client)", allow_module_level=True) | ||
| 24 | + | ||
| 25 | + | ||
| 26 | +def _test_encoding(client, test_name, inputs): | ||
| 21 | """测试编码功能""" | 27 | """测试编码功能""" |
| 22 | print(f"\n{test_name}...") | 28 | print(f"\n{test_name}...") |
| 23 | try: | 29 | try: |
| @@ -74,21 +80,21 @@ def main(): | @@ -74,21 +80,21 @@ def main(): | ||
| 74 | results = [] | 80 | results = [] |
| 75 | 81 | ||
| 76 | # 测试1: 文本编码 | 82 | # 测试1: 文本编码 |
| 77 | - results.append(test_encoding( | 83 | + results.append(_test_encoding( |
| 78 | client, | 84 | client, |
| 79 | "测试1: 编码文本", | 85 | "测试1: 编码文本", |
| 80 | ['这是一个测试文本', '另一个测试文本'] | 86 | ['这是一个测试文本', '另一个测试文本'] |
| 81 | )) | 87 | )) |
| 82 | 88 | ||
| 83 | # 测试2: 图像编码 | 89 | # 测试2: 图像编码 |
| 84 | - results.append(test_encoding( | 90 | + results.append(_test_encoding( |
| 85 | client, | 91 | client, |
| 86 | "测试2: 编码图像(远程 URL)", | 92 | "测试2: 编码图像(远程 URL)", |
| 87 | ['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] | 93 | ['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] |
| 88 | )) | 94 | )) |
| 89 | 95 | ||
| 90 | # 测试3: 混合编码 | 96 | # 测试3: 混合编码 |
| 91 | - results.append(test_encoding( | 97 | + results.append(_test_encoding( |
| 92 | client, | 98 | client, |
| 93 | "测试3: 混合编码(文本和图像)", | 99 | "测试3: 混合编码(文本和图像)", |
| 94 | ['这是一段文本', 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] | 100 | ['这是一段文本', 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] |