From 72e7256ae49c8422d6008aed15677168cb705cc9 Mon Sep 17 00:00:00 2001 From: tangwang Date: Wed, 31 Dec 2025 17:27:12 +0800 Subject: [PATCH] 清理文件 --- scripts/generate_test_data.py | 421 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- scripts/generate_test_summary.py | 179 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- scripts/import_tenant2_csv.py | 495 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- scripts/import_test_data.py | 277 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- scripts/indexer__old_2025_11/import_tenant2_csv.py | 495 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/indexer__old_2025_11/import_test_data.py | 277 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/indexer__old_2025_11/ingest.sh | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/indexer__old_2025_11/ingest_shoplazza.py | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/indexer__old_2025_11/recreate_and_import.py | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scripts/ingest.sh | 87 --------------------------------------------------------------------------------------- scripts/ingest_shoplazza.py | 146 -------------------------------------------------------------------------------------------------------------------------------------------------- scripts/recreate_and_import.py | 184 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- scripts/run_tests.py | 705 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- scripts/start_test_environment.sh | 275 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- scripts/stop_test_environment.sh | 82 ---------------------------------------------------------------------------------- scripts/test_base.py | 242 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- scripts/test_cloud_embedding.py | 183 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- scripts/test_cnclip_service.py | 115 ------------------------------------------------------------------------------------------------------------------- scripts/test_facet_api.py | 131 ----------------------------------------------------------------------------------------------------------------------------------- scripts/test_frontend.sh | 94 ---------------------------------------------------------------------------------------------- tests/test_cloud_embedding.py | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/test_cnclip_service.py | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/test_facet_api.py | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 23 files changed, 1618 insertions(+), 3616 deletions(-) delete mode 100644 scripts/generate_test_data.py delete mode 100644 scripts/generate_test_summary.py delete mode 100755 scripts/import_tenant2_csv.py delete mode 100644 scripts/import_test_data.py create mode 100755 scripts/indexer__old_2025_11/import_tenant2_csv.py create mode 100644 scripts/indexer__old_2025_11/import_test_data.py create mode 100755 scripts/indexer__old_2025_11/ingest.sh create mode 100644 scripts/indexer__old_2025_11/ingest_shoplazza.py create mode 100755 scripts/indexer__old_2025_11/recreate_and_import.py delete mode 100755 scripts/ingest.sh delete mode 100644 scripts/ingest_shoplazza.py delete mode 100755 scripts/recreate_and_import.py delete mode 100755 scripts/run_tests.py delete mode 100755 scripts/start_test_environment.sh delete mode 100755 scripts/stop_test_environment.sh delete mode 100644 scripts/test_base.py delete mode 100644 scripts/test_cloud_embedding.py delete mode 100755 scripts/test_cnclip_service.py delete mode 100755 scripts/test_facet_api.py delete mode 100755 scripts/test_frontend.sh create mode 100644 tests/test_cloud_embedding.py create mode 100755 tests/test_cnclip_service.py create mode 100755 tests/test_facet_api.py diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py deleted file mode 100644 index ecf1067..0000000 --- a/scripts/generate_test_data.py +++ /dev/null @@ -1,421 +0,0 @@ -#!/usr/bin/env python3 -""" -Generate test data for Shoplazza SPU and SKU tables. - -Generates 100 SPU records with 1-5 SKUs each. -""" - -import sys -import os -import random -import argparse -from pathlib import Path -from datetime import datetime, timedelta - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - - -def generate_spu_data(num_spus: int = 100, tenant_id: str = "1", start_id: int = 1): - """ - Generate SPU test data. - - Args: - num_spus: Number of SPUs to generate - tenant_id: Tenant ID - start_id: Starting ID for SPUs - - Returns: - List of SPU data dictionaries - """ - categories = ["电子产品", "服装", "家居用品", "美妆", "食品", "运动用品", "图书", "玩具"] - vendors = ["Sony", "Nike", "Apple", "Samsung", "华为", "小米", "美的", "海尔"] - - products = [ - ("蓝牙耳机", "Bluetooth Headphone", "高品质无线蓝牙耳机", "High-quality wireless Bluetooth headphone"), - ("运动鞋", "Running Shoes", "舒适透气的运动鞋", "Comfortable and breathable running shoes"), - ("智能手机", "Smartphone", "高性能智能手机", "High-performance smartphone"), - ("笔记本电脑", "Laptop", "轻薄便携笔记本电脑", "Lightweight and portable laptop"), - ("智能手表", "Smart Watch", "多功能智能手表", "Multi-function smart watch"), - ("平板电脑", "Tablet", "高清平板电脑", "High-definition tablet"), - ("无线鼠标", "Wireless Mouse", "人体工学无线鼠标", "Ergonomic wireless mouse"), - ("机械键盘", "Mechanical Keyboard", "RGB背光机械键盘", "RGB backlit mechanical keyboard"), - ("显示器", "Monitor", "4K高清显示器", "4K high-definition monitor"), - ("音响", "Speaker", "蓝牙无线音响", "Bluetooth wireless speaker"), - ] - - spus = [] - for i in range(num_spus): - spu_id = start_id + i - product = random.choice(products) - category = random.choice(categories) - vendor = random.choice(vendors) - - # Generate handle - handle = f"product-{spu_id}" - - # Generate title (Chinese) - title_zh = f"{product[0]} {vendor}" - - # Generate brief - brief_zh = product[2] - - # Generate description - description_zh = f"

{product[2]},来自{vendor}品牌。{product[3]}

" - - # Generate SEO fields - seo_title = f"{title_zh} - {category}" - seo_description = f"购买{vendor}{product[0]},{product[2]}" - seo_keywords = f"{product[0]},{vendor},{category}" - - # Generate tags - tags = f"{category},{vendor},{product[0]}" - - # Generate image - image_src = f"//cdn.example.com/products/{spu_id}.jpg" - - # Generate dates - created_at = datetime.now() - timedelta(days=random.randint(1, 365)) - updated_at = created_at + timedelta(days=random.randint(0, 30)) - - spu = { - 'id': spu_id, - 'shop_id': 1, - 'shoplazza_id': f"spu-{spu_id}", - 'handle': handle, - 'title': title_zh, - 'brief': brief_zh, - 'description': description_zh, - 'spu': '', - 'vendor': vendor, - 'vendor_url': f"https://{vendor.lower()}.com", - 'seo_title': seo_title, - 'seo_description': seo_description, - 'seo_keywords': seo_keywords, - 'image_src': image_src, - 'image_width': 800, - 'image_height': 600, - 'image_path': f"products/{spu_id}.jpg", - 'image_alt': title_zh, - 'inventory_policy': '', - 'inventory_quantity': 0, - 'inventory_tracking': '0', - 'published': 1, - 'published_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'requires_shipping': 1, - 'taxable': 0, - 'fake_sales': 0, - 'display_fake_sales': 0, - 'mixed_wholesale': 0, - 'need_variant_image': 0, - 'has_only_default_variant': 0, - 'tags': tags, - 'note': '', - 'category': category, - 'shoplazza_created_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'shoplazza_updated_at': updated_at.strftime('%Y-%m-%d %H:%M:%S'), - 'tenant_id': tenant_id, - 'creator': '1', - 'create_time': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'updater': '1', - 'update_time': updated_at.strftime('%Y-%m-%d %H:%M:%S'), - 'deleted': 0 - } - spus.append(spu) - - return spus - - -def generate_sku_data(spus: list, start_sku_id: int = 1): - """ - Generate SKU test data for SPUs. - - Args: - spus: List of SPU data - start_sku_id: Starting ID for SKUs - - Returns: - List of SKU data dictionaries - """ - colors = ["黑色", "白色", "红色", "蓝色", "绿色", "灰色"] - sizes = ["S", "M", "L", "XL", "XXL"] - - skus = [] - sku_id = start_sku_id - - for spu in spus: - spu_id = spu['id'] - num_skus = random.randint(1, 5) - - # Base price - base_price = random.uniform(50, 500) - - for i in range(num_skus): - # Generate variant options - color = random.choice(colors) if num_skus > 1 else None - size = random.choice(sizes) if num_skus > 2 else None - - # Generate title - title_parts = [] - if color: - title_parts.append(color) - if size: - title_parts.append(size) - title = " / ".join(title_parts) if title_parts else "" - - # Generate SKU - sku_code = f"SKU-{spu_id}-{i+1}" - - # Generate price (variation from base) - price = base_price + random.uniform(-20, 50) - compare_at_price = price * random.uniform(1.2, 1.5) - - # Generate stock - stock = random.randint(0, 100) - - # Generate dates - created_at = datetime.now() - timedelta(days=random.randint(1, 365)) - updated_at = created_at + timedelta(days=random.randint(0, 30)) - - sku = { - 'id': sku_id, - 'spu_id': spu_id, - 'shop_id': 1, - 'shoplazza_id': f"sku-{sku_id}", - 'shoplazza_product_id': spu['shoplazza_id'], - 'shoplazza_image_id': '', - 'title': title, - 'sku': sku_code, - 'barcode': f"BAR{sku_id:08d}", - 'position': i + 1, - 'price': round(price, 2), - 'compare_at_price': round(compare_at_price, 2), - 'cost_price': round(price * 0.6, 2), - 'option1': color if color else '', - 'option2': size if size else '', - 'option3': '', - 'inventory_quantity': stock, - 'weight': round(random.uniform(0.1, 5.0), 2), - 'weight_unit': 'kg', - 'image_src': '', - 'wholesale_price': '[{"price": ' + str(round(price * 0.8, 2)) + ', "minQuantity": 10}]', - 'note': '', - 'extend': None, # JSON field, use NULL instead of empty string - 'shoplazza_created_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'shoplazza_updated_at': updated_at.strftime('%Y-%m-%d %H:%M:%S'), - 'tenant_id': spu['tenant_id'], - 'creator': '1', - 'create_time': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'updater': '1', - 'update_time': updated_at.strftime('%Y-%m-%d %H:%M:%S'), - 'deleted': 0 - } - skus.append(sku) - sku_id += 1 - - return skus - - -def escape_sql_string(value: str) -> str: - """ - Escape SQL string value (replace single quotes with doubled quotes). - - Args: - value: String value to escape - - Returns: - Escaped string - """ - if value is None: - return '' - return str(value).replace("'", "''").replace("\\", "\\\\") - - -def generate_sql_inserts(spus: list, skus: list, output_file: str): - """ - Generate SQL INSERT statements. - - Args: - spus: List of SPU data - skus: List of SKU data - output_file: Output file path - """ - with open(output_file, 'w', encoding='utf-8') as f: - f.write("-- SPU Test Data\n") - f.write("INSERT INTO shoplazza_product_spu (\n") - f.write(" id, shop_id, shoplazza_id, handle, title, brief, description, spu,\n") - f.write(" vendor, vendor_url, seo_title, seo_description, seo_keywords,\n") - f.write(" image_src, image_width, image_height, image_path, image_alt,\n") - f.write(" inventory_policy, inventory_quantity, inventory_tracking,\n") - f.write(" published, published_at, requires_shipping, taxable,\n") - f.write(" fake_sales, display_fake_sales, mixed_wholesale, need_variant_image,\n") - f.write(" has_only_default_variant, tags, note, category,\n") - f.write(" shoplazza_created_at, shoplazza_updated_at, tenant_id,\n") - f.write(" creator, create_time, updater, update_time, deleted\n") - f.write(") VALUES\n") - - for i, spu in enumerate(spus): - values = ( - f"({spu['id']}, {spu['shop_id']}, '{escape_sql_string(spu['shoplazza_id'])}', " - f"'{escape_sql_string(spu['handle'])}', '{escape_sql_string(spu['title'])}', " - f"'{escape_sql_string(spu['brief'])}', '{escape_sql_string(spu['description'])}', " - f"'{escape_sql_string(spu['spu'])}', '{escape_sql_string(spu['vendor'])}', " - f"'{escape_sql_string(spu['vendor_url'])}', '{escape_sql_string(spu['seo_title'])}', " - f"'{escape_sql_string(spu['seo_description'])}', '{escape_sql_string(spu['seo_keywords'])}', " - f"'{escape_sql_string(spu['image_src'])}', {spu['image_width']}, " - f"{spu['image_height']}, '{escape_sql_string(spu['image_path'])}', " - f"'{escape_sql_string(spu['image_alt'])}', '{escape_sql_string(spu['inventory_policy'])}', " - f"{spu['inventory_quantity']}, '{escape_sql_string(spu['inventory_tracking'])}', " - f"{spu['published']}, '{escape_sql_string(spu['published_at'])}', " - f"{spu['requires_shipping']}, {spu['taxable']}, " - f"{spu['fake_sales']}, {spu['display_fake_sales']}, {spu['mixed_wholesale']}, " - f"{spu['need_variant_image']}, {spu['has_only_default_variant']}, " - f"'{escape_sql_string(spu['tags'])}', '{escape_sql_string(spu['note'])}', " - f"'{escape_sql_string(spu['category'])}', '{escape_sql_string(spu['shoplazza_created_at'])}', " - f"'{escape_sql_string(spu['shoplazza_updated_at'])}', '{escape_sql_string(spu['tenant_id'])}', " - f"'{escape_sql_string(spu['creator'])}', '{escape_sql_string(spu['create_time'])}', " - f"'{escape_sql_string(spu['updater'])}', '{escape_sql_string(spu['update_time'])}', " - f"{spu['deleted']})" - ) - f.write(values) - if i < len(spus) - 1: - f.write(",\n") - else: - f.write(";\n\n") - - f.write("-- SKU Test Data\n") - f.write("INSERT INTO shoplazza_product_sku (\n") - f.write(" id, spu_id, shop_id, shoplazza_id, shoplazza_product_id, shoplazza_image_id,\n") - f.write(" title, sku, barcode, position, price, compare_at_price, cost_price,\n") - f.write(" option1, option2, option3, inventory_quantity, weight, weight_unit,\n") - f.write(" image_src, wholesale_price, note, extend,\n") - f.write(" shoplazza_created_at, shoplazza_updated_at, tenant_id,\n") - f.write(" creator, create_time, updater, update_time, deleted\n") - f.write(") VALUES\n") - - for i, sku in enumerate(skus): - # Handle extend field (JSON, can be NULL) - extend_value = 'NULL' if sku['extend'] is None else f"'{escape_sql_string(sku['extend'])}'" - - values = ( - f"({sku['id']}, {sku['spu_id']}, {sku['shop_id']}, '{escape_sql_string(sku['shoplazza_id'])}', " - f"'{escape_sql_string(sku['shoplazza_product_id'])}', '{escape_sql_string(sku['shoplazza_image_id'])}', " - f"'{escape_sql_string(sku['title'])}', '{escape_sql_string(sku['sku'])}', " - f"'{escape_sql_string(sku['barcode'])}', {sku['position']}, " - f"{sku['price']}, {sku['compare_at_price']}, {sku['cost_price']}, " - f"'{escape_sql_string(sku['option1'])}', '{escape_sql_string(sku['option2'])}', " - f"'{escape_sql_string(sku['option3'])}', {sku['inventory_quantity']}, {sku['weight']}, " - f"'{escape_sql_string(sku['weight_unit'])}', '{escape_sql_string(sku['image_src'])}', " - f"'{escape_sql_string(sku['wholesale_price'])}', '{escape_sql_string(sku['note'])}', " - f"{extend_value}, '{escape_sql_string(sku['shoplazza_created_at'])}', " - f"'{escape_sql_string(sku['shoplazza_updated_at'])}', '{escape_sql_string(sku['tenant_id'])}', " - f"'{escape_sql_string(sku['creator'])}', '{escape_sql_string(sku['create_time'])}', " - f"'{escape_sql_string(sku['updater'])}', '{escape_sql_string(sku['update_time'])}', " - f"{sku['deleted']})" - ) - f.write(values) - if i < len(skus) - 1: - f.write(",\n") - else: - f.write(";\n") - - -def get_max_ids_from_db(db_config=None): - """ - Get maximum IDs from database to avoid primary key conflicts. - - Args: - db_config: Optional database config dict with keys: host, port, database, username, password - - Returns: - tuple: (max_spu_id, max_sku_id) or (0, 0) if cannot connect - """ - if not db_config: - return 0, 0 - - try: - from utils.db_connector import create_db_connection - from sqlalchemy import text - - db_engine = create_db_connection( - host=db_config['host'], - port=db_config['port'], - database=db_config['database'], - username=db_config['username'], - password=db_config['password'] - ) - - with db_engine.connect() as conn: - result = conn.execute(text('SELECT MAX(id) FROM shoplazza_product_spu')) - max_spu_id = result.scalar() or 0 - - result = conn.execute(text('SELECT MAX(id) FROM shoplazza_product_sku')) - max_sku_id = result.scalar() or 0 - - return max_spu_id, max_sku_id - except Exception as e: - print(f"Warning: Could not get max IDs from database: {e}") - return 0, 0 - - -def main(): - parser = argparse.ArgumentParser(description='Generate test data for Shoplazza tables') - parser.add_argument('--num-spus', type=int, default=100, help='Number of SPUs to generate') - parser.add_argument('--tenant-id', default='1', help='Tenant ID') - parser.add_argument('--start-spu-id', type=int, default=None, help='Starting SPU ID (default: auto-calculate from DB)') - parser.add_argument('--start-sku-id', type=int, default=None, help='Starting SKU ID (default: auto-calculate from DB)') - parser.add_argument('--output', default='test_data.sql', help='Output SQL file') - parser.add_argument('--db-host', help='Database host (for auto-calculating start IDs)') - parser.add_argument('--db-port', type=int, default=3306, help='Database port (default: 3306)') - parser.add_argument('--db-database', help='Database name (for auto-calculating start IDs)') - parser.add_argument('--db-username', help='Database username (for auto-calculating start IDs)') - parser.add_argument('--db-password', help='Database password (for auto-calculating start IDs)') - - args = parser.parse_args() - - # Auto-calculate start IDs if not provided and DB config available - start_spu_id = args.start_spu_id - start_sku_id = args.start_sku_id - - if (start_spu_id is None or start_sku_id is None) and args.db_host and args.db_database and args.db_username and args.db_password: - print("Auto-calculating start IDs from database...") - db_config = { - 'host': args.db_host, - 'port': args.db_port, - 'database': args.db_database, - 'username': args.db_username, - 'password': args.db_password - } - max_spu_id, max_sku_id = get_max_ids_from_db(db_config) - if start_spu_id is None: - start_spu_id = max_spu_id + 1 - if start_sku_id is None: - start_sku_id = max_sku_id + 1 - print(f" Max SPU ID in DB: {max_spu_id}, using start SPU ID: {start_spu_id}") - print(f" Max SKU ID in DB: {max_sku_id}, using start SKU ID: {start_sku_id}") - else: - if start_spu_id is None: - start_spu_id = 1 - if start_sku_id is None: - start_sku_id = 1 - print(f"Using start SPU ID: {start_spu_id}, start SKU ID: {start_sku_id}") - - print(f"Generating {args.num_spus} SPUs with skus...") - - # Generate SPU data - spus = generate_spu_data(args.num_spus, args.tenant_id, start_spu_id) - print(f"Generated {len(spus)} SPUs") - - # Generate SKU data - skus = generate_sku_data(spus, start_sku_id) - print(f"Generated {len(skus)} SKUs") - - # Generate SQL file - generate_sql_inserts(spus, skus, args.output) - print(f"SQL file generated: {args.output}") - - -if __name__ == '__main__': - import json - main() - diff --git a/scripts/generate_test_summary.py b/scripts/generate_test_summary.py deleted file mode 100644 index f85bf35..0000000 --- a/scripts/generate_test_summary.py +++ /dev/null @@ -1,179 +0,0 @@ -#!/usr/bin/env python3 -""" -生成测试摘要脚本 - -用于CI/CD流水线中汇总所有测试结果 -""" - -import json -import os -import sys -import glob -from pathlib import Path -from datetime import datetime -from typing import Dict, Any, List - - -def collect_test_results() -> Dict[str, Any]: - """收集所有测试结果""" - results = { - 'timestamp': datetime.now().isoformat(), - 'suites': {}, - 'summary': { - 'total_tests': 0, - 'passed': 0, - 'failed': 0, - 'skipped': 0, - 'errors': 0, - 'total_duration': 0.0 - } - } - - # 查找所有测试结果文件 - test_files = glob.glob('*_test_results.json') - - for test_file in test_files: - try: - with open(test_file, 'r', encoding='utf-8') as f: - test_data = json.load(f) - - suite_name = test_file.replace('_test_results.json', '') - - if 'summary' in test_data: - summary = test_data['summary'] - results['suites'][suite_name] = { - 'total': summary.get('total', 0), - 'passed': summary.get('passed', 0), - 'failed': summary.get('failed', 0), - 'skipped': summary.get('skipped', 0), - 'errors': summary.get('error', 0), - 'duration': summary.get('duration', 0.0) - } - - # 更新总体统计 - results['summary']['total_tests'] += summary.get('total', 0) - results['summary']['passed'] += summary.get('passed', 0) - results['summary']['failed'] += summary.get('failed', 0) - results['summary']['skipped'] += summary.get('skipped', 0) - results['summary']['errors'] += summary.get('error', 0) - results['summary']['total_duration'] += summary.get('duration', 0.0) - - except Exception as e: - print(f"Error reading {test_file}: {e}") - continue - - # 计算成功率 - if results['summary']['total_tests'] > 0: - results['summary']['success_rate'] = ( - results['summary']['passed'] / results['summary']['total_tests'] * 100 - ) - else: - results['summary']['success_rate'] = 0.0 - - return results - - -def generate_text_report(results: Dict[str, Any]) -> str: - """生成文本格式的测试报告""" - lines = [] - - # 标题 - lines.append("=" * 60) - lines.append("搜索引擎自动化测试报告") - lines.append("=" * 60) - lines.append(f"时间: {results['timestamp']}") - lines.append("") - - # 摘要 - summary = results['summary'] - lines.append("📊 测试摘要") - lines.append("-" * 30) - lines.append(f"总测试数: {summary['total_tests']}") - lines.append(f"✅ 通过: {summary['passed']}") - lines.append(f"❌ 失败: {summary['failed']}") - lines.append(f"⏭️ 跳过: {summary['skipped']}") - lines.append(f"🚨 错误: {summary['errors']}") - lines.append(f"📈 成功率: {summary['success_rate']:.1f}%") - lines.append(f"⏱️ 总耗时: {summary['total_duration']:.2f}秒") - lines.append("") - - # 状态判断 - if summary['failed'] == 0 and summary['errors'] == 0: - lines.append("🎉 所有测试都通过了!") - else: - lines.append("⚠️ 存在失败的测试,请查看详细日志。") - lines.append("") - - # 各测试套件详情 - if results['suites']: - lines.append("📋 测试套件详情") - lines.append("-" * 30) - - for suite_name, suite_data in results['suites'].items(): - lines.append(f"\n{suite_name.upper()}:") - lines.append(f" 总数: {suite_data['total']}") - lines.append(f" ✅ 通过: {suite_data['passed']}") - lines.append(f" ❌ 失败: {suite_data['failed']}") - lines.append(f" ⏭️ 跳过: {suite_data['skipped']}") - lines.append(f" 🚨 错误: {suite_data['errors']}") - lines.append(f" ⏱️ 耗时: {suite_data['duration']:.2f}秒") - - # 添加状态图标 - if suite_data['failed'] == 0 and suite_data['errors'] == 0: - lines.append(f" 状态: ✅ 全部通过") - else: - lines.append(f" 状态: ❌ 存在问题") - - lines.append("") - lines.append("=" * 60) - - return "\n".join(lines) - - -def generate_json_report(results: Dict[str, Any]) -> str: - """生成JSON格式的测试报告""" - return json.dumps(results, indent=2, ensure_ascii=False) - - -def main(): - """主函数""" - # 收集测试结果 - print("收集测试结果...") - results = collect_test_results() - - # 生成报告 - print("生成测试报告...") - text_report = generate_text_report(results) - json_report = generate_json_report(results) - - # 保存报告 - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - - # 文本报告 - text_file = f"final_test_report.txt" - with open(text_file, 'w', encoding='utf-8') as f: - f.write(text_report) - - # JSON报告 - json_file = f"final_test_report.json" - with open(json_file, 'w', encoding='utf-8') as f: - f.write(json_report) - - print(f"测试报告已生成:") - print(f" 文本报告: {text_file}") - print(f" JSON报告: {json_file}") - - # 输出摘要到控制台 - print("\n" + "=" * 60) - print(text_report) - - # 返回退出码 - summary = results['summary'] - if summary['failed'] > 0 or summary['errors'] > 0: - return 1 - else: - return 0 - - -if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file diff --git a/scripts/import_tenant2_csv.py b/scripts/import_tenant2_csv.py deleted file mode 100755 index 063dd77..0000000 --- a/scripts/import_tenant2_csv.py +++ /dev/null @@ -1,495 +0,0 @@ -#!/usr/bin/env python3 -""" -Import tenant2 CSV data into MySQL Shoplazza tables. - -Reads CSV file and generates SQL INSERT statements for SPU and SKU tables. -Each CSV row corresponds to 1 SPU and 1 SKU. -This script is for generating test data for tenant_id=2 from CSV files. -""" - -import sys -import os -import csv -import random -import argparse -import re -from pathlib import Path -from datetime import datetime, timedelta - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - - -def escape_sql_string(value: str) -> str: - """ - Escape SQL string value (replace single quotes with doubled quotes and handle special characters). - - Args: - value: String value to escape - - Returns: - Escaped string - """ - if value is None: - return '' - - # Convert to string and handle None - s = str(value) - - # Replace single quotes with doubled quotes (SQL standard) - s = s.replace("'", "''") - - # Replace backslashes (MySQL escape) - s = s.replace("\\", "\\\\") - - # Remove or replace control characters that can break SQL - # Replace newlines and carriage returns with spaces - s = s.replace("\n", " ").replace("\r", " ") - - # Remove other control characters (except tab) - s = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', s) - - # Remove null bytes - s = s.replace('\x00', '') - - return s - - -def generate_handle(title: str) -> str: - """ - Generate URL-friendly handle from title. - - Args: - title: Product title - - Returns: - URL-friendly handle - """ - # Remove special characters, convert to lowercase, replace spaces with hyphens - handle = re.sub(r'[^\w\s-]', '', title.lower()) - handle = re.sub(r'[-\s]+', '-', handle) - handle = handle.strip('-') - # Limit length - if len(handle) > 255: - handle = handle[:255] - return handle or 'product' - - -def parse_csv_row(row: dict) -> dict: - """ - Parse CSV row and extract fields. - - Args: - row: CSV row dictionary - - Returns: - Parsed data dictionary - """ - # Remove quotes from values if present - def clean_value(value): - if value is None: - return '' - value = str(value).strip() - # Remove surrounding quotes - if value.startswith('"') and value.endswith('"'): - value = value[1:-1] - return value - - return { - 'skuId': clean_value(row.get('skuId', '')), - 'name': clean_value(row.get('name', '')), - 'name_pinyin': clean_value(row.get('name_pinyin', '')), - 'create_time': clean_value(row.get('create_time', '')), - 'ruSkuName': clean_value(row.get('ruSkuName', '')), - 'enSpuName': clean_value(row.get('enSpuName', '')), - 'categoryName': clean_value(row.get('categoryName', '')), - 'supplierName': clean_value(row.get('supplierName', '')), - 'brandName': clean_value(row.get('brandName', '')), - 'file_id': clean_value(row.get('file_id', '')), - 'days_since_last_update': clean_value(row.get('days_since_last_update', '')), - 'id': clean_value(row.get('id', '')), - 'imageUrl': clean_value(row.get('imageUrl', '')) - } - - -def generate_spu_data(csv_data: dict, spu_id: int, tenant_id: str = "2") -> dict: - """ - Generate SPU data from CSV row. - - Args: - csv_data: Parsed CSV row data - spu_id: SPU ID - tenant_id: Tenant ID (default: "2") - - Returns: - SPU data dictionary - """ - # Parse create_time - try: - created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S') - except: - created_at = datetime.now() - timedelta(days=random.randint(1, 365)) - - updated_at = created_at + timedelta(days=random.randint(0, 30)) - - # Generate handle from title - title = csv_data['name'] or csv_data['enSpuName'] or 'Product' - handle = generate_handle(title) - - # Generate tags from category and brand - tags_parts = [] - if csv_data['categoryName']: - tags_parts.append(csv_data['categoryName']) - if csv_data['brandName']: - tags_parts.append(csv_data['brandName']) - tags = ','.join(tags_parts) if tags_parts else '' - - # Generate SEO fields - seo_title = f"{title} - {csv_data['categoryName']}" if csv_data['categoryName'] else title - seo_description = f"购买{csv_data['brandName']}{title}" if csv_data['brandName'] else title - seo_keywords = f"{title},{csv_data['categoryName']},{csv_data['brandName']}" if csv_data['categoryName'] else title - - spu = { - 'id': spu_id, - 'shop_id': 1, - 'shoplazza_id': csv_data['id'] or f"spu-{spu_id}", - 'handle': handle, - 'title': title, - 'brief': csv_data['name'] or '', - 'description': f"

{csv_data['name']}

" if csv_data['name'] else '', - 'spu': '', - 'vendor': csv_data['supplierName'] or '', - 'vendor_url': '', - 'seo_title': seo_title, - 'seo_description': seo_description, - 'seo_keywords': seo_keywords, - 'image_src': csv_data['imageUrl'] or '', - 'image_width': 800, - 'image_height': 600, - 'image_path': f"products/{spu_id}.jpg", - 'image_alt': title, - 'inventory_policy': '', - 'inventory_quantity': 0, - 'inventory_tracking': '0', - 'published': 1, - 'published_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'requires_shipping': 1, - 'taxable': 0, - 'fake_sales': 0, - 'display_fake_sales': 0, - 'mixed_wholesale': 0, - 'need_variant_image': 0, - 'has_only_default_variant': 0, - 'tags': tags, - 'note': '', - 'category': csv_data['categoryName'] or '', - 'shoplazza_created_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'shoplazza_updated_at': updated_at.strftime('%Y-%m-%d %H:%M:%S'), - 'tenant_id': tenant_id, - 'creator': '1', - 'create_time': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'updater': '1', - 'update_time': updated_at.strftime('%Y-%m-%d %H:%M:%S'), - 'deleted': 0 - } - - return spu - - -def generate_sku_data(csv_data: dict, spu_id: int, sku_id: int, tenant_id: str = "2") -> dict: - """ - Generate SKU data from CSV row. - - Args: - csv_data: Parsed CSV row data - spu_id: Associated SPU ID - sku_id: SKU ID (from CSV skuId) - tenant_id: Tenant ID (default: "2") - - Returns: - SKU data dictionary - """ - # Parse create_time - try: - created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S') - except: - created_at = datetime.now() - timedelta(days=random.randint(1, 365)) - - updated_at = created_at + timedelta(days=random.randint(0, 30)) - - # Generate random price - price = round(random.uniform(50, 500), 2) - compare_at_price = round(price * random.uniform(1.2, 1.5), 2) - cost_price = round(price * 0.6, 2) - - # Generate random stock - inventory_quantity = random.randint(0, 100) - - # Generate random weight - weight = round(random.uniform(0.1, 5.0), 2) - - # Use ruSkuName as title, fallback to name - title = csv_data['ruSkuName'] or csv_data['name'] or 'SKU' - - # Use skuId as SKU code - sku_code = csv_data['skuId'] or f"SKU-{sku_id}" - - sku = { - 'id': sku_id, - 'spu_id': spu_id, - 'shop_id': 1, - 'shoplazza_id': f"sku-{sku_id}", - 'shoplazza_product_id': csv_data['id'] or f"spu-{spu_id}", - 'shoplazza_image_id': '', - 'title': title, - 'sku': sku_code, - 'barcode': f"BAR{sku_id:08d}", - 'position': 1, - 'price': price, - 'compare_at_price': compare_at_price, - 'cost_price': cost_price, - 'option1': '', - 'option2': '', - 'option3': '', - 'inventory_quantity': inventory_quantity, - 'weight': weight, - 'weight_unit': 'kg', - 'image_src': csv_data['imageUrl'] or '', - 'wholesale_price': f'[{{"price": {round(price * 0.8, 2)}, "minQuantity": 10}}]', - 'note': '', - 'extend': None, # JSON field, use NULL - 'shoplazza_created_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'shoplazza_updated_at': updated_at.strftime('%Y-%m-%d %H:%M:%S'), - 'tenant_id': tenant_id, - 'creator': '1', - 'create_time': created_at.strftime('%Y-%m-%d %H:%M:%S'), - 'updater': '1', - 'update_time': updated_at.strftime('%Y-%m-%d %H:%M:%S'), - 'deleted': 0 - } - - return sku - - -def read_csv_file(csv_file: str) -> list: - """ - Read CSV file and return list of parsed rows. - - Args: - csv_file: Path to CSV file - - Returns: - List of parsed CSV data dictionaries - """ - csv_data_list = [] - - with open(csv_file, 'r', encoding='utf-8') as f: - # Use csv.DictReader to handle quoted fields properly - reader = csv.DictReader(f) - for row in reader: - parsed = parse_csv_row(row) - csv_data_list.append(parsed) - - return csv_data_list - - -def generate_sql_inserts(spus: list, skus: list, output_file: str): - """ - Generate SQL INSERT statements. - - Args: - spus: List of SPU data - skus: List of SKU data - output_file: Output file path - """ - with open(output_file, 'w', encoding='utf-8') as f: - f.write("-- SPU Data from tenant2 CSV\n") - f.write("INSERT INTO shoplazza_product_spu (\n") - f.write(" id, shop_id, shoplazza_id, handle, title, brief, description, spu,\n") - f.write(" vendor, vendor_url, seo_title, seo_description, seo_keywords,\n") - f.write(" image_src, image_width, image_height, image_path, image_alt,\n") - f.write(" inventory_policy, inventory_quantity, inventory_tracking,\n") - f.write(" published, published_at, requires_shipping, taxable,\n") - f.write(" fake_sales, display_fake_sales, mixed_wholesale, need_variant_image,\n") - f.write(" has_only_default_variant, tags, note, category,\n") - f.write(" shoplazza_created_at, shoplazza_updated_at, tenant_id,\n") - f.write(" creator, create_time, updater, update_time, deleted\n") - f.write(") VALUES\n") - - for i, spu in enumerate(spus): - values = ( - f"({spu['id']}, {spu['shop_id']}, '{escape_sql_string(spu['shoplazza_id'])}', " - f"'{escape_sql_string(spu['handle'])}', '{escape_sql_string(spu['title'])}', " - f"'{escape_sql_string(spu['brief'])}', '{escape_sql_string(spu['description'])}', " - f"'{escape_sql_string(spu['spu'])}', '{escape_sql_string(spu['vendor'])}', " - f"'{escape_sql_string(spu['vendor_url'])}', '{escape_sql_string(spu['seo_title'])}', " - f"'{escape_sql_string(spu['seo_description'])}', '{escape_sql_string(spu['seo_keywords'])}', " - f"'{escape_sql_string(spu['image_src'])}', {spu['image_width']}, " - f"{spu['image_height']}, '{escape_sql_string(spu['image_path'])}', " - f"'{escape_sql_string(spu['image_alt'])}', '{escape_sql_string(spu['inventory_policy'])}', " - f"{spu['inventory_quantity']}, '{escape_sql_string(spu['inventory_tracking'])}', " - f"{spu['published']}, '{escape_sql_string(spu['published_at'])}', " - f"{spu['requires_shipping']}, {spu['taxable']}, " - f"{spu['fake_sales']}, {spu['display_fake_sales']}, {spu['mixed_wholesale']}, " - f"{spu['need_variant_image']}, {spu['has_only_default_variant']}, " - f"'{escape_sql_string(spu['tags'])}', '{escape_sql_string(spu['note'])}', " - f"'{escape_sql_string(spu['category'])}', '{escape_sql_string(spu['shoplazza_created_at'])}', " - f"'{escape_sql_string(spu['shoplazza_updated_at'])}', '{escape_sql_string(spu['tenant_id'])}', " - f"'{escape_sql_string(spu['creator'])}', '{escape_sql_string(spu['create_time'])}', " - f"'{escape_sql_string(spu['updater'])}', '{escape_sql_string(spu['update_time'])}', " - f"{spu['deleted']})" - ) - f.write(values) - if i < len(spus) - 1: - f.write(",\n") - else: - f.write(";\n\n") - - f.write("-- SKU Data from tenant2 CSV\n") - f.write("INSERT INTO shoplazza_product_sku (\n") - f.write(" id, spu_id, shop_id, shoplazza_id, shoplazza_product_id, shoplazza_image_id,\n") - f.write(" title, sku, barcode, position, price, compare_at_price, cost_price,\n") - f.write(" option1, option2, option3, inventory_quantity, weight, weight_unit,\n") - f.write(" image_src, wholesale_price, note, extend,\n") - f.write(" shoplazza_created_at, shoplazza_updated_at, tenant_id,\n") - f.write(" creator, create_time, updater, update_time, deleted\n") - f.write(") VALUES\n") - - for i, sku in enumerate(skus): - # Handle extend field (JSON, can be NULL) - extend_value = 'NULL' if sku['extend'] is None else f"'{escape_sql_string(sku['extend'])}'" - - values = ( - f"({sku['id']}, {sku['spu_id']}, {sku['shop_id']}, '{escape_sql_string(sku['shoplazza_id'])}', " - f"'{escape_sql_string(sku['shoplazza_product_id'])}', '{escape_sql_string(sku['shoplazza_image_id'])}', " - f"'{escape_sql_string(sku['title'])}', '{escape_sql_string(sku['sku'])}', " - f"'{escape_sql_string(sku['barcode'])}', {sku['position']}, " - f"{sku['price']}, {sku['compare_at_price']}, {sku['cost_price']}, " - f"'{escape_sql_string(sku['option1'])}', '{escape_sql_string(sku['option2'])}', " - f"'{escape_sql_string(sku['option3'])}', {sku['inventory_quantity']}, {sku['weight']}, " - f"'{escape_sql_string(sku['weight_unit'])}', '{escape_sql_string(sku['image_src'])}', " - f"'{escape_sql_string(sku['wholesale_price'])}', '{escape_sql_string(sku['note'])}', " - f"{extend_value}, '{escape_sql_string(sku['shoplazza_created_at'])}', " - f"'{escape_sql_string(sku['shoplazza_updated_at'])}', '{escape_sql_string(sku['tenant_id'])}', " - f"'{escape_sql_string(sku['creator'])}', '{escape_sql_string(sku['create_time'])}', " - f"'{escape_sql_string(sku['updater'])}', '{escape_sql_string(sku['update_time'])}', " - f"{sku['deleted']})" - ) - f.write(values) - if i < len(skus) - 1: - f.write(",\n") - else: - f.write(";\n") - - -def get_max_ids_from_db(db_config=None): - """ - Get maximum IDs from database to avoid primary key conflicts. - - Args: - db_config: Optional database config dict with keys: host, port, database, username, password - - Returns: - tuple: (max_spu_id, max_sku_id) or (0, 0) if cannot connect - """ - if not db_config: - return 0, 0 - - try: - from utils.db_connector import create_db_connection - from sqlalchemy import text - - db_engine = create_db_connection( - host=db_config['host'], - port=db_config['port'], - database=db_config['database'], - username=db_config['username'], - password=db_config['password'] - ) - - with db_engine.connect() as conn: - result = conn.execute(text('SELECT MAX(id) FROM shoplazza_product_spu')) - max_spu_id = result.scalar() or 0 - - result = conn.execute(text('SELECT MAX(id) FROM shoplazza_product_sku')) - max_sku_id = result.scalar() or 0 - - return max_spu_id, max_sku_id - except Exception as e: - print(f"Warning: Could not get max IDs from database: {e}") - return 0, 0 - - -def main(): - parser = argparse.ArgumentParser(description='Import tenant2 CSV data into MySQL Shoplazza tables') - parser.add_argument('--csv-file', required=True, help='CSV file path') - parser.add_argument('--tenant-id', default='2', help='Tenant ID (default: 2)') - parser.add_argument('--start-spu-id', type=int, default=None, help='Starting SPU ID (default: auto-calculate from DB)') - parser.add_argument('--output', default='tenant2_data.sql', help='Output SQL file (default: tenant2_data.sql)') - parser.add_argument('--db-host', help='Database host (for auto-calculating start IDs)') - parser.add_argument('--db-port', type=int, default=3306, help='Database port (default: 3306)') - parser.add_argument('--db-database', help='Database name (for auto-calculating start IDs)') - parser.add_argument('--db-username', help='Database username (for auto-calculating start IDs)') - parser.add_argument('--db-password', help='Database password (for auto-calculating start IDs)') - - args = parser.parse_args() - - print(f"Reading CSV file: {args.csv_file}") - csv_data_list = read_csv_file(args.csv_file) - print(f"Read {len(csv_data_list)} rows from CSV") - - # Auto-calculate start IDs if not provided and DB config available - start_spu_id = args.start_spu_id - if start_spu_id is None and args.db_host and args.db_database and args.db_username and args.db_password: - print("Auto-calculating start IDs from database...") - db_config = { - 'host': args.db_host, - 'port': args.db_port, - 'database': args.db_database, - 'username': args.db_username, - 'password': args.db_password - } - max_spu_id, max_sku_id = get_max_ids_from_db(db_config) - start_spu_id = max_spu_id + 1 - print(f" Max SPU ID in DB: {max_spu_id}") - print(f" Using start SPU ID: {start_spu_id}") - elif start_spu_id is None: - start_spu_id = 1 - print(f"Using default start SPU ID: {start_spu_id}") - - # Generate SPU and SKU data - print(f"Generating SPU and SKU data (tenant_id={args.tenant_id})...") - spus = [] - skus = [] - spu_id = start_spu_id - - for csv_data in csv_data_list: - # Generate SPU - spu = generate_spu_data(csv_data, spu_id, args.tenant_id) - spus.append(spu) - - # Generate SKU - use skuId from CSV as SKU ID - try: - sku_id = int(csv_data['skuId']) - except: - # If skuId is not valid, use a generated ID - sku_id = 1000000 + spu_id - - sku = generate_sku_data(csv_data, spu_id, sku_id, args.tenant_id) - skus.append(sku) - - spu_id += 1 - - print(f"Generated {len(spus)} SPUs and {len(skus)} SKUs") - - # Generate SQL file - print(f"Generating SQL file: {args.output}") - generate_sql_inserts(spus, skus, args.output) - print(f"SQL file generated: {args.output}") - print(f" - SPUs: {len(spus)}") - print(f" - SKUs: {len(skus)}") - - -if __name__ == '__main__': - main() - diff --git a/scripts/import_test_data.py b/scripts/import_test_data.py deleted file mode 100644 index 97ea83d..0000000 --- a/scripts/import_test_data.py +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/env python3 -""" -Import test data into MySQL Shoplazza tables. - -Reads SQL file generated by generate_test_data.py and imports into MySQL. -""" - -import sys -import os -import argparse -from pathlib import Path - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from utils.db_connector import create_db_connection, test_connection - - -def import_sql_file(db_engine, sql_file: str): - """ - Import SQL file into database using MySQL client (more reliable for large files). - - Args: - db_engine: SQLAlchemy database engine (used to get connection info) - sql_file: Path to SQL file - """ - import subprocess - import os - from pathlib import Path - - # Get connection info from engine URL - engine_url = str(db_engine.url) - # Parse: mysql+pymysql://user:pass@host:port/database - import re - match = re.match(r'mysql\+pymysql://([^:]+):([^@]+)@([^:]+):(\d+)/(.+)', engine_url) - if not match: - raise ValueError(f"Cannot parse database URL: {engine_url}") - - username, password, host, port, database = match.groups() - - # Use MySQL client to execute SQL file (more reliable) - sql_file_path = Path(sql_file).absolute() - - # Build mysql command - mysql_cmd = [ - 'mysql', - f'-h{host}', - f'-P{port}', - f'-u{username}', - f'-p{password}', - database - ] - - print(f"Executing SQL file using MySQL client...") - print(f" File: {sql_file_path}") - print(f" Database: {host}:{port}/{database}") - - try: - with open(sql_file_path, 'r', encoding='utf-8') as f: - result = subprocess.run( - mysql_cmd, - stdin=f, - capture_output=True, - text=True, - timeout=300 # 5 minute timeout - ) - - if result.returncode != 0: - error_msg = result.stderr or result.stdout - print(f"ERROR: MySQL execution failed") - print(f"Error output: {error_msg[:500]}") - raise Exception(f"MySQL execution failed: {error_msg[:200]}") - - print("SQL file executed successfully") - return True - - except FileNotFoundError: - # Fallback to SQLAlchemy if mysql client not available - print("MySQL client not found, falling back to SQLAlchemy...") - return import_sql_file_sqlalchemy(db_engine, sql_file) - except subprocess.TimeoutExpired: - raise Exception("SQL execution timed out after 5 minutes") - except Exception as e: - print(f"Error using MySQL client: {e}") - print("Falling back to SQLAlchemy...") - return import_sql_file_sqlalchemy(db_engine, sql_file) - - -def import_sql_file_sqlalchemy(db_engine, sql_file: str): - """ - Fallback method: Import SQL file using SQLAlchemy (for when mysql client unavailable). - """ - from sqlalchemy import text - - with open(sql_file, 'r', encoding='utf-8') as f: - sql_content = f.read() - - # Remove comment lines - lines = sql_content.split('\n') - cleaned_lines = [] - for line in lines: - stripped = line.lstrip() - if stripped.startswith('--'): - continue - cleaned_lines.append(line) - - sql_content = '\n'.join(cleaned_lines) - - # Split by semicolon - but we need to handle strings properly - # Use a state machine to track string boundaries - statements = [] - current = [] - in_string = False - i = 0 - - while i < len(sql_content): - char = sql_content[i] - - if char == "'": - # Check for escaped quote (two single quotes) - if i + 1 < len(sql_content) and sql_content[i+1] == "'": - current.append("''") - i += 1 # Skip next quote - elif not in_string: - in_string = True - current.append(char) - else: - in_string = False - current.append(char) - else: - current.append(char) - - # Split on semicolon only if not in string - if char == ';' and not in_string: - stmt = ''.join(current).strip() - if stmt and stmt.upper().startswith('INSERT INTO'): - statements.append(stmt) - current = [] - - i += 1 - - # Handle last statement - if current: - stmt = ''.join(current).strip() - if stmt and stmt.upper().startswith('INSERT INTO'): - statements.append(stmt) - - print(f"Parsed {len(statements)} SQL statements") - print(f"Executing {len(statements)} SQL statements...") - - # Use raw connection to avoid SQLAlchemy parameter parsing - raw_conn = db_engine.raw_connection() - try: - cursor = raw_conn.cursor() - try: - for i, statement in enumerate(statements, 1): - try: - # Execute raw SQL directly using pymysql cursor - cursor.execute(statement) - raw_conn.commit() - if i % 1000 == 0 or i == len(statements): - print(f" [{i}/{len(statements)}] Executed successfully") - except Exception as e: - print(f" [{i}/{len(statements)}] ERROR: {e}") - error_start = max(0, statement.find('VALUES') - 100) - error_end = min(len(statement), error_start + 500) - print(f" Statement context: ...{statement[error_start:error_end]}...") - raise - finally: - cursor.close() - finally: - raw_conn.close() - - return True - - -def verify_import(db_engine, tenant_id: str): - """ - Verify imported data. - - Args: - db_engine: SQLAlchemy database engine - tenant_id: Tenant ID to verify - """ - from sqlalchemy import text - - with db_engine.connect() as conn: - # Count SPUs - result = conn.execute(text("SELECT COUNT(*) FROM shoplazza_product_spu WHERE tenant_id = :tenant_id"), {"tenant_id": tenant_id}) - spu_count = result.scalar() - - # Count SKUs - result = conn.execute(text("SELECT COUNT(*) FROM shoplazza_product_sku WHERE tenant_id = :tenant_id"), {"tenant_id": tenant_id}) - sku_count = result.scalar() - - print(f"\nVerification:") - print(f" SPUs: {spu_count}") - print(f" SKUs: {sku_count}") - - return spu_count, sku_count - - -def main(): - parser = argparse.ArgumentParser(description='Import test data into MySQL') - - # Database connection - parser.add_argument('--db-host', required=True, help='MySQL host') - parser.add_argument('--db-port', type=int, default=3306, help='MySQL port (default: 3306)') - parser.add_argument('--db-database', required=True, help='MySQL database name') - parser.add_argument('--db-username', required=True, help='MySQL username') - parser.add_argument('--db-password', required=True, help='MySQL password') - - # Import options - parser.add_argument('--sql-file', required=True, help='SQL file to import') - parser.add_argument('--tenant-id', help='Tenant ID to verify (optional)') - - args = parser.parse_args() - - print(f"Connecting to MySQL: {args.db_host}:{args.db_port}/{args.db_database}") - - # Connect to database - try: - db_engine = create_db_connection( - host=args.db_host, - port=args.db_port, - database=args.db_database, - username=args.db_username, - password=args.db_password - ) - except Exception as e: - print(f"ERROR: Failed to connect to MySQL: {e}") - return 1 - - # Test connection - if not test_connection(db_engine): - print("ERROR: Database connection test failed") - return 1 - - print("Database connection successful") - - # Clean existing data if tenant_id provided - if args.tenant_id: - print(f"\nCleaning existing data for tenant_id: {args.tenant_id}") - from sqlalchemy import text - try: - with db_engine.connect() as conn: - # Delete SKUs first (foreign key constraint) - conn.execute(text(f"DELETE FROM shoplazza_product_sku WHERE tenant_id = '{args.tenant_id}'")) - # Delete SPUs - conn.execute(text(f"DELETE FROM shoplazza_product_spu WHERE tenant_id = '{args.tenant_id}'")) - conn.commit() - print("✓ Existing data cleaned") - except Exception as e: - print(f"⚠ Warning: Failed to clean existing data: {e}") - # Continue anyway - - # Import SQL file - print(f"\nImporting SQL file: {args.sql_file}") - try: - import_sql_file(db_engine, args.sql_file) - print("Import completed successfully") - except Exception as e: - print(f"ERROR: Failed to import SQL file: {e}") - import traceback - traceback.print_exc() - return 1 - - # Verify import if tenant_id provided - if args.tenant_id: - verify_import(db_engine, args.tenant_id) - - return 0 - - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/scripts/indexer__old_2025_11/import_tenant2_csv.py b/scripts/indexer__old_2025_11/import_tenant2_csv.py new file mode 100755 index 0000000..063dd77 --- /dev/null +++ b/scripts/indexer__old_2025_11/import_tenant2_csv.py @@ -0,0 +1,495 @@ +#!/usr/bin/env python3 +""" +Import tenant2 CSV data into MySQL Shoplazza tables. + +Reads CSV file and generates SQL INSERT statements for SPU and SKU tables. +Each CSV row corresponds to 1 SPU and 1 SKU. +This script is for generating test data for tenant_id=2 from CSV files. +""" + +import sys +import os +import csv +import random +import argparse +import re +from pathlib import Path +from datetime import datetime, timedelta + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + + +def escape_sql_string(value: str) -> str: + """ + Escape SQL string value (replace single quotes with doubled quotes and handle special characters). + + Args: + value: String value to escape + + Returns: + Escaped string + """ + if value is None: + return '' + + # Convert to string and handle None + s = str(value) + + # Replace single quotes with doubled quotes (SQL standard) + s = s.replace("'", "''") + + # Replace backslashes (MySQL escape) + s = s.replace("\\", "\\\\") + + # Remove or replace control characters that can break SQL + # Replace newlines and carriage returns with spaces + s = s.replace("\n", " ").replace("\r", " ") + + # Remove other control characters (except tab) + s = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', s) + + # Remove null bytes + s = s.replace('\x00', '') + + return s + + +def generate_handle(title: str) -> str: + """ + Generate URL-friendly handle from title. + + Args: + title: Product title + + Returns: + URL-friendly handle + """ + # Remove special characters, convert to lowercase, replace spaces with hyphens + handle = re.sub(r'[^\w\s-]', '', title.lower()) + handle = re.sub(r'[-\s]+', '-', handle) + handle = handle.strip('-') + # Limit length + if len(handle) > 255: + handle = handle[:255] + return handle or 'product' + + +def parse_csv_row(row: dict) -> dict: + """ + Parse CSV row and extract fields. + + Args: + row: CSV row dictionary + + Returns: + Parsed data dictionary + """ + # Remove quotes from values if present + def clean_value(value): + if value is None: + return '' + value = str(value).strip() + # Remove surrounding quotes + if value.startswith('"') and value.endswith('"'): + value = value[1:-1] + return value + + return { + 'skuId': clean_value(row.get('skuId', '')), + 'name': clean_value(row.get('name', '')), + 'name_pinyin': clean_value(row.get('name_pinyin', '')), + 'create_time': clean_value(row.get('create_time', '')), + 'ruSkuName': clean_value(row.get('ruSkuName', '')), + 'enSpuName': clean_value(row.get('enSpuName', '')), + 'categoryName': clean_value(row.get('categoryName', '')), + 'supplierName': clean_value(row.get('supplierName', '')), + 'brandName': clean_value(row.get('brandName', '')), + 'file_id': clean_value(row.get('file_id', '')), + 'days_since_last_update': clean_value(row.get('days_since_last_update', '')), + 'id': clean_value(row.get('id', '')), + 'imageUrl': clean_value(row.get('imageUrl', '')) + } + + +def generate_spu_data(csv_data: dict, spu_id: int, tenant_id: str = "2") -> dict: + """ + Generate SPU data from CSV row. + + Args: + csv_data: Parsed CSV row data + spu_id: SPU ID + tenant_id: Tenant ID (default: "2") + + Returns: + SPU data dictionary + """ + # Parse create_time + try: + created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S') + except: + created_at = datetime.now() - timedelta(days=random.randint(1, 365)) + + updated_at = created_at + timedelta(days=random.randint(0, 30)) + + # Generate handle from title + title = csv_data['name'] or csv_data['enSpuName'] or 'Product' + handle = generate_handle(title) + + # Generate tags from category and brand + tags_parts = [] + if csv_data['categoryName']: + tags_parts.append(csv_data['categoryName']) + if csv_data['brandName']: + tags_parts.append(csv_data['brandName']) + tags = ','.join(tags_parts) if tags_parts else '' + + # Generate SEO fields + seo_title = f"{title} - {csv_data['categoryName']}" if csv_data['categoryName'] else title + seo_description = f"购买{csv_data['brandName']}{title}" if csv_data['brandName'] else title + seo_keywords = f"{title},{csv_data['categoryName']},{csv_data['brandName']}" if csv_data['categoryName'] else title + + spu = { + 'id': spu_id, + 'shop_id': 1, + 'shoplazza_id': csv_data['id'] or f"spu-{spu_id}", + 'handle': handle, + 'title': title, + 'brief': csv_data['name'] or '', + 'description': f"

{csv_data['name']}

" if csv_data['name'] else '', + 'spu': '', + 'vendor': csv_data['supplierName'] or '', + 'vendor_url': '', + 'seo_title': seo_title, + 'seo_description': seo_description, + 'seo_keywords': seo_keywords, + 'image_src': csv_data['imageUrl'] or '', + 'image_width': 800, + 'image_height': 600, + 'image_path': f"products/{spu_id}.jpg", + 'image_alt': title, + 'inventory_policy': '', + 'inventory_quantity': 0, + 'inventory_tracking': '0', + 'published': 1, + 'published_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), + 'requires_shipping': 1, + 'taxable': 0, + 'fake_sales': 0, + 'display_fake_sales': 0, + 'mixed_wholesale': 0, + 'need_variant_image': 0, + 'has_only_default_variant': 0, + 'tags': tags, + 'note': '', + 'category': csv_data['categoryName'] or '', + 'shoplazza_created_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), + 'shoplazza_updated_at': updated_at.strftime('%Y-%m-%d %H:%M:%S'), + 'tenant_id': tenant_id, + 'creator': '1', + 'create_time': created_at.strftime('%Y-%m-%d %H:%M:%S'), + 'updater': '1', + 'update_time': updated_at.strftime('%Y-%m-%d %H:%M:%S'), + 'deleted': 0 + } + + return spu + + +def generate_sku_data(csv_data: dict, spu_id: int, sku_id: int, tenant_id: str = "2") -> dict: + """ + Generate SKU data from CSV row. + + Args: + csv_data: Parsed CSV row data + spu_id: Associated SPU ID + sku_id: SKU ID (from CSV skuId) + tenant_id: Tenant ID (default: "2") + + Returns: + SKU data dictionary + """ + # Parse create_time + try: + created_at = datetime.strptime(csv_data['create_time'], '%Y-%m-%d %H:%M:%S') + except: + created_at = datetime.now() - timedelta(days=random.randint(1, 365)) + + updated_at = created_at + timedelta(days=random.randint(0, 30)) + + # Generate random price + price = round(random.uniform(50, 500), 2) + compare_at_price = round(price * random.uniform(1.2, 1.5), 2) + cost_price = round(price * 0.6, 2) + + # Generate random stock + inventory_quantity = random.randint(0, 100) + + # Generate random weight + weight = round(random.uniform(0.1, 5.0), 2) + + # Use ruSkuName as title, fallback to name + title = csv_data['ruSkuName'] or csv_data['name'] or 'SKU' + + # Use skuId as SKU code + sku_code = csv_data['skuId'] or f"SKU-{sku_id}" + + sku = { + 'id': sku_id, + 'spu_id': spu_id, + 'shop_id': 1, + 'shoplazza_id': f"sku-{sku_id}", + 'shoplazza_product_id': csv_data['id'] or f"spu-{spu_id}", + 'shoplazza_image_id': '', + 'title': title, + 'sku': sku_code, + 'barcode': f"BAR{sku_id:08d}", + 'position': 1, + 'price': price, + 'compare_at_price': compare_at_price, + 'cost_price': cost_price, + 'option1': '', + 'option2': '', + 'option3': '', + 'inventory_quantity': inventory_quantity, + 'weight': weight, + 'weight_unit': 'kg', + 'image_src': csv_data['imageUrl'] or '', + 'wholesale_price': f'[{{"price": {round(price * 0.8, 2)}, "minQuantity": 10}}]', + 'note': '', + 'extend': None, # JSON field, use NULL + 'shoplazza_created_at': created_at.strftime('%Y-%m-%d %H:%M:%S'), + 'shoplazza_updated_at': updated_at.strftime('%Y-%m-%d %H:%M:%S'), + 'tenant_id': tenant_id, + 'creator': '1', + 'create_time': created_at.strftime('%Y-%m-%d %H:%M:%S'), + 'updater': '1', + 'update_time': updated_at.strftime('%Y-%m-%d %H:%M:%S'), + 'deleted': 0 + } + + return sku + + +def read_csv_file(csv_file: str) -> list: + """ + Read CSV file and return list of parsed rows. + + Args: + csv_file: Path to CSV file + + Returns: + List of parsed CSV data dictionaries + """ + csv_data_list = [] + + with open(csv_file, 'r', encoding='utf-8') as f: + # Use csv.DictReader to handle quoted fields properly + reader = csv.DictReader(f) + for row in reader: + parsed = parse_csv_row(row) + csv_data_list.append(parsed) + + return csv_data_list + + +def generate_sql_inserts(spus: list, skus: list, output_file: str): + """ + Generate SQL INSERT statements. + + Args: + spus: List of SPU data + skus: List of SKU data + output_file: Output file path + """ + with open(output_file, 'w', encoding='utf-8') as f: + f.write("-- SPU Data from tenant2 CSV\n") + f.write("INSERT INTO shoplazza_product_spu (\n") + f.write(" id, shop_id, shoplazza_id, handle, title, brief, description, spu,\n") + f.write(" vendor, vendor_url, seo_title, seo_description, seo_keywords,\n") + f.write(" image_src, image_width, image_height, image_path, image_alt,\n") + f.write(" inventory_policy, inventory_quantity, inventory_tracking,\n") + f.write(" published, published_at, requires_shipping, taxable,\n") + f.write(" fake_sales, display_fake_sales, mixed_wholesale, need_variant_image,\n") + f.write(" has_only_default_variant, tags, note, category,\n") + f.write(" shoplazza_created_at, shoplazza_updated_at, tenant_id,\n") + f.write(" creator, create_time, updater, update_time, deleted\n") + f.write(") VALUES\n") + + for i, spu in enumerate(spus): + values = ( + f"({spu['id']}, {spu['shop_id']}, '{escape_sql_string(spu['shoplazza_id'])}', " + f"'{escape_sql_string(spu['handle'])}', '{escape_sql_string(spu['title'])}', " + f"'{escape_sql_string(spu['brief'])}', '{escape_sql_string(spu['description'])}', " + f"'{escape_sql_string(spu['spu'])}', '{escape_sql_string(spu['vendor'])}', " + f"'{escape_sql_string(spu['vendor_url'])}', '{escape_sql_string(spu['seo_title'])}', " + f"'{escape_sql_string(spu['seo_description'])}', '{escape_sql_string(spu['seo_keywords'])}', " + f"'{escape_sql_string(spu['image_src'])}', {spu['image_width']}, " + f"{spu['image_height']}, '{escape_sql_string(spu['image_path'])}', " + f"'{escape_sql_string(spu['image_alt'])}', '{escape_sql_string(spu['inventory_policy'])}', " + f"{spu['inventory_quantity']}, '{escape_sql_string(spu['inventory_tracking'])}', " + f"{spu['published']}, '{escape_sql_string(spu['published_at'])}', " + f"{spu['requires_shipping']}, {spu['taxable']}, " + f"{spu['fake_sales']}, {spu['display_fake_sales']}, {spu['mixed_wholesale']}, " + f"{spu['need_variant_image']}, {spu['has_only_default_variant']}, " + f"'{escape_sql_string(spu['tags'])}', '{escape_sql_string(spu['note'])}', " + f"'{escape_sql_string(spu['category'])}', '{escape_sql_string(spu['shoplazza_created_at'])}', " + f"'{escape_sql_string(spu['shoplazza_updated_at'])}', '{escape_sql_string(spu['tenant_id'])}', " + f"'{escape_sql_string(spu['creator'])}', '{escape_sql_string(spu['create_time'])}', " + f"'{escape_sql_string(spu['updater'])}', '{escape_sql_string(spu['update_time'])}', " + f"{spu['deleted']})" + ) + f.write(values) + if i < len(spus) - 1: + f.write(",\n") + else: + f.write(";\n\n") + + f.write("-- SKU Data from tenant2 CSV\n") + f.write("INSERT INTO shoplazza_product_sku (\n") + f.write(" id, spu_id, shop_id, shoplazza_id, shoplazza_product_id, shoplazza_image_id,\n") + f.write(" title, sku, barcode, position, price, compare_at_price, cost_price,\n") + f.write(" option1, option2, option3, inventory_quantity, weight, weight_unit,\n") + f.write(" image_src, wholesale_price, note, extend,\n") + f.write(" shoplazza_created_at, shoplazza_updated_at, tenant_id,\n") + f.write(" creator, create_time, updater, update_time, deleted\n") + f.write(") VALUES\n") + + for i, sku in enumerate(skus): + # Handle extend field (JSON, can be NULL) + extend_value = 'NULL' if sku['extend'] is None else f"'{escape_sql_string(sku['extend'])}'" + + values = ( + f"({sku['id']}, {sku['spu_id']}, {sku['shop_id']}, '{escape_sql_string(sku['shoplazza_id'])}', " + f"'{escape_sql_string(sku['shoplazza_product_id'])}', '{escape_sql_string(sku['shoplazza_image_id'])}', " + f"'{escape_sql_string(sku['title'])}', '{escape_sql_string(sku['sku'])}', " + f"'{escape_sql_string(sku['barcode'])}', {sku['position']}, " + f"{sku['price']}, {sku['compare_at_price']}, {sku['cost_price']}, " + f"'{escape_sql_string(sku['option1'])}', '{escape_sql_string(sku['option2'])}', " + f"'{escape_sql_string(sku['option3'])}', {sku['inventory_quantity']}, {sku['weight']}, " + f"'{escape_sql_string(sku['weight_unit'])}', '{escape_sql_string(sku['image_src'])}', " + f"'{escape_sql_string(sku['wholesale_price'])}', '{escape_sql_string(sku['note'])}', " + f"{extend_value}, '{escape_sql_string(sku['shoplazza_created_at'])}', " + f"'{escape_sql_string(sku['shoplazza_updated_at'])}', '{escape_sql_string(sku['tenant_id'])}', " + f"'{escape_sql_string(sku['creator'])}', '{escape_sql_string(sku['create_time'])}', " + f"'{escape_sql_string(sku['updater'])}', '{escape_sql_string(sku['update_time'])}', " + f"{sku['deleted']})" + ) + f.write(values) + if i < len(skus) - 1: + f.write(",\n") + else: + f.write(";\n") + + +def get_max_ids_from_db(db_config=None): + """ + Get maximum IDs from database to avoid primary key conflicts. + + Args: + db_config: Optional database config dict with keys: host, port, database, username, password + + Returns: + tuple: (max_spu_id, max_sku_id) or (0, 0) if cannot connect + """ + if not db_config: + return 0, 0 + + try: + from utils.db_connector import create_db_connection + from sqlalchemy import text + + db_engine = create_db_connection( + host=db_config['host'], + port=db_config['port'], + database=db_config['database'], + username=db_config['username'], + password=db_config['password'] + ) + + with db_engine.connect() as conn: + result = conn.execute(text('SELECT MAX(id) FROM shoplazza_product_spu')) + max_spu_id = result.scalar() or 0 + + result = conn.execute(text('SELECT MAX(id) FROM shoplazza_product_sku')) + max_sku_id = result.scalar() or 0 + + return max_spu_id, max_sku_id + except Exception as e: + print(f"Warning: Could not get max IDs from database: {e}") + return 0, 0 + + +def main(): + parser = argparse.ArgumentParser(description='Import tenant2 CSV data into MySQL Shoplazza tables') + parser.add_argument('--csv-file', required=True, help='CSV file path') + parser.add_argument('--tenant-id', default='2', help='Tenant ID (default: 2)') + parser.add_argument('--start-spu-id', type=int, default=None, help='Starting SPU ID (default: auto-calculate from DB)') + parser.add_argument('--output', default='tenant2_data.sql', help='Output SQL file (default: tenant2_data.sql)') + parser.add_argument('--db-host', help='Database host (for auto-calculating start IDs)') + parser.add_argument('--db-port', type=int, default=3306, help='Database port (default: 3306)') + parser.add_argument('--db-database', help='Database name (for auto-calculating start IDs)') + parser.add_argument('--db-username', help='Database username (for auto-calculating start IDs)') + parser.add_argument('--db-password', help='Database password (for auto-calculating start IDs)') + + args = parser.parse_args() + + print(f"Reading CSV file: {args.csv_file}") + csv_data_list = read_csv_file(args.csv_file) + print(f"Read {len(csv_data_list)} rows from CSV") + + # Auto-calculate start IDs if not provided and DB config available + start_spu_id = args.start_spu_id + if start_spu_id is None and args.db_host and args.db_database and args.db_username and args.db_password: + print("Auto-calculating start IDs from database...") + db_config = { + 'host': args.db_host, + 'port': args.db_port, + 'database': args.db_database, + 'username': args.db_username, + 'password': args.db_password + } + max_spu_id, max_sku_id = get_max_ids_from_db(db_config) + start_spu_id = max_spu_id + 1 + print(f" Max SPU ID in DB: {max_spu_id}") + print(f" Using start SPU ID: {start_spu_id}") + elif start_spu_id is None: + start_spu_id = 1 + print(f"Using default start SPU ID: {start_spu_id}") + + # Generate SPU and SKU data + print(f"Generating SPU and SKU data (tenant_id={args.tenant_id})...") + spus = [] + skus = [] + spu_id = start_spu_id + + for csv_data in csv_data_list: + # Generate SPU + spu = generate_spu_data(csv_data, spu_id, args.tenant_id) + spus.append(spu) + + # Generate SKU - use skuId from CSV as SKU ID + try: + sku_id = int(csv_data['skuId']) + except: + # If skuId is not valid, use a generated ID + sku_id = 1000000 + spu_id + + sku = generate_sku_data(csv_data, spu_id, sku_id, args.tenant_id) + skus.append(sku) + + spu_id += 1 + + print(f"Generated {len(spus)} SPUs and {len(skus)} SKUs") + + # Generate SQL file + print(f"Generating SQL file: {args.output}") + generate_sql_inserts(spus, skus, args.output) + print(f"SQL file generated: {args.output}") + print(f" - SPUs: {len(spus)}") + print(f" - SKUs: {len(skus)}") + + +if __name__ == '__main__': + main() + diff --git a/scripts/indexer__old_2025_11/import_test_data.py b/scripts/indexer__old_2025_11/import_test_data.py new file mode 100644 index 0000000..97ea83d --- /dev/null +++ b/scripts/indexer__old_2025_11/import_test_data.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python3 +""" +Import test data into MySQL Shoplazza tables. + +Reads SQL file generated by generate_test_data.py and imports into MySQL. +""" + +import sys +import os +import argparse +from pathlib import Path + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from utils.db_connector import create_db_connection, test_connection + + +def import_sql_file(db_engine, sql_file: str): + """ + Import SQL file into database using MySQL client (more reliable for large files). + + Args: + db_engine: SQLAlchemy database engine (used to get connection info) + sql_file: Path to SQL file + """ + import subprocess + import os + from pathlib import Path + + # Get connection info from engine URL + engine_url = str(db_engine.url) + # Parse: mysql+pymysql://user:pass@host:port/database + import re + match = re.match(r'mysql\+pymysql://([^:]+):([^@]+)@([^:]+):(\d+)/(.+)', engine_url) + if not match: + raise ValueError(f"Cannot parse database URL: {engine_url}") + + username, password, host, port, database = match.groups() + + # Use MySQL client to execute SQL file (more reliable) + sql_file_path = Path(sql_file).absolute() + + # Build mysql command + mysql_cmd = [ + 'mysql', + f'-h{host}', + f'-P{port}', + f'-u{username}', + f'-p{password}', + database + ] + + print(f"Executing SQL file using MySQL client...") + print(f" File: {sql_file_path}") + print(f" Database: {host}:{port}/{database}") + + try: + with open(sql_file_path, 'r', encoding='utf-8') as f: + result = subprocess.run( + mysql_cmd, + stdin=f, + capture_output=True, + text=True, + timeout=300 # 5 minute timeout + ) + + if result.returncode != 0: + error_msg = result.stderr or result.stdout + print(f"ERROR: MySQL execution failed") + print(f"Error output: {error_msg[:500]}") + raise Exception(f"MySQL execution failed: {error_msg[:200]}") + + print("SQL file executed successfully") + return True + + except FileNotFoundError: + # Fallback to SQLAlchemy if mysql client not available + print("MySQL client not found, falling back to SQLAlchemy...") + return import_sql_file_sqlalchemy(db_engine, sql_file) + except subprocess.TimeoutExpired: + raise Exception("SQL execution timed out after 5 minutes") + except Exception as e: + print(f"Error using MySQL client: {e}") + print("Falling back to SQLAlchemy...") + return import_sql_file_sqlalchemy(db_engine, sql_file) + + +def import_sql_file_sqlalchemy(db_engine, sql_file: str): + """ + Fallback method: Import SQL file using SQLAlchemy (for when mysql client unavailable). + """ + from sqlalchemy import text + + with open(sql_file, 'r', encoding='utf-8') as f: + sql_content = f.read() + + # Remove comment lines + lines = sql_content.split('\n') + cleaned_lines = [] + for line in lines: + stripped = line.lstrip() + if stripped.startswith('--'): + continue + cleaned_lines.append(line) + + sql_content = '\n'.join(cleaned_lines) + + # Split by semicolon - but we need to handle strings properly + # Use a state machine to track string boundaries + statements = [] + current = [] + in_string = False + i = 0 + + while i < len(sql_content): + char = sql_content[i] + + if char == "'": + # Check for escaped quote (two single quotes) + if i + 1 < len(sql_content) and sql_content[i+1] == "'": + current.append("''") + i += 1 # Skip next quote + elif not in_string: + in_string = True + current.append(char) + else: + in_string = False + current.append(char) + else: + current.append(char) + + # Split on semicolon only if not in string + if char == ';' and not in_string: + stmt = ''.join(current).strip() + if stmt and stmt.upper().startswith('INSERT INTO'): + statements.append(stmt) + current = [] + + i += 1 + + # Handle last statement + if current: + stmt = ''.join(current).strip() + if stmt and stmt.upper().startswith('INSERT INTO'): + statements.append(stmt) + + print(f"Parsed {len(statements)} SQL statements") + print(f"Executing {len(statements)} SQL statements...") + + # Use raw connection to avoid SQLAlchemy parameter parsing + raw_conn = db_engine.raw_connection() + try: + cursor = raw_conn.cursor() + try: + for i, statement in enumerate(statements, 1): + try: + # Execute raw SQL directly using pymysql cursor + cursor.execute(statement) + raw_conn.commit() + if i % 1000 == 0 or i == len(statements): + print(f" [{i}/{len(statements)}] Executed successfully") + except Exception as e: + print(f" [{i}/{len(statements)}] ERROR: {e}") + error_start = max(0, statement.find('VALUES') - 100) + error_end = min(len(statement), error_start + 500) + print(f" Statement context: ...{statement[error_start:error_end]}...") + raise + finally: + cursor.close() + finally: + raw_conn.close() + + return True + + +def verify_import(db_engine, tenant_id: str): + """ + Verify imported data. + + Args: + db_engine: SQLAlchemy database engine + tenant_id: Tenant ID to verify + """ + from sqlalchemy import text + + with db_engine.connect() as conn: + # Count SPUs + result = conn.execute(text("SELECT COUNT(*) FROM shoplazza_product_spu WHERE tenant_id = :tenant_id"), {"tenant_id": tenant_id}) + spu_count = result.scalar() + + # Count SKUs + result = conn.execute(text("SELECT COUNT(*) FROM shoplazza_product_sku WHERE tenant_id = :tenant_id"), {"tenant_id": tenant_id}) + sku_count = result.scalar() + + print(f"\nVerification:") + print(f" SPUs: {spu_count}") + print(f" SKUs: {sku_count}") + + return spu_count, sku_count + + +def main(): + parser = argparse.ArgumentParser(description='Import test data into MySQL') + + # Database connection + parser.add_argument('--db-host', required=True, help='MySQL host') + parser.add_argument('--db-port', type=int, default=3306, help='MySQL port (default: 3306)') + parser.add_argument('--db-database', required=True, help='MySQL database name') + parser.add_argument('--db-username', required=True, help='MySQL username') + parser.add_argument('--db-password', required=True, help='MySQL password') + + # Import options + parser.add_argument('--sql-file', required=True, help='SQL file to import') + parser.add_argument('--tenant-id', help='Tenant ID to verify (optional)') + + args = parser.parse_args() + + print(f"Connecting to MySQL: {args.db_host}:{args.db_port}/{args.db_database}") + + # Connect to database + try: + db_engine = create_db_connection( + host=args.db_host, + port=args.db_port, + database=args.db_database, + username=args.db_username, + password=args.db_password + ) + except Exception as e: + print(f"ERROR: Failed to connect to MySQL: {e}") + return 1 + + # Test connection + if not test_connection(db_engine): + print("ERROR: Database connection test failed") + return 1 + + print("Database connection successful") + + # Clean existing data if tenant_id provided + if args.tenant_id: + print(f"\nCleaning existing data for tenant_id: {args.tenant_id}") + from sqlalchemy import text + try: + with db_engine.connect() as conn: + # Delete SKUs first (foreign key constraint) + conn.execute(text(f"DELETE FROM shoplazza_product_sku WHERE tenant_id = '{args.tenant_id}'")) + # Delete SPUs + conn.execute(text(f"DELETE FROM shoplazza_product_spu WHERE tenant_id = '{args.tenant_id}'")) + conn.commit() + print("✓ Existing data cleaned") + except Exception as e: + print(f"⚠ Warning: Failed to clean existing data: {e}") + # Continue anyway + + # Import SQL file + print(f"\nImporting SQL file: {args.sql_file}") + try: + import_sql_file(db_engine, args.sql_file) + print("Import completed successfully") + except Exception as e: + print(f"ERROR: Failed to import SQL file: {e}") + import traceback + traceback.print_exc() + return 1 + + # Verify import if tenant_id provided + if args.tenant_id: + verify_import(db_engine, args.tenant_id) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/scripts/indexer__old_2025_11/ingest.sh b/scripts/indexer__old_2025_11/ingest.sh new file mode 100755 index 0000000..a420fe3 --- /dev/null +++ b/scripts/indexer__old_2025_11/ingest.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +# Unified data ingestion script for SearchEngine +# Ingests data from MySQL to Elasticsearch + +cd "$(dirname "$0")/.." +source /home/tw/miniconda3/etc/profile.d/conda.sh +conda activate searchengine + +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' + +echo -e "${GREEN}========================================${NC}" +echo -e "${GREEN}数据灌入脚本${NC}" +echo -e "${GREEN}========================================${NC}" + +# Load config from .env file if it exists +if [ -f .env ]; then + set -a + source .env + set +a +fi + +# Parameters +TENANT_ID=${1:-""} +RECREATE_INDEX=${2:-"false"} + +DB_HOST=${DB_HOST:-"120.79.247.228"} +DB_PORT=${DB_PORT:-"3316"} +DB_DATABASE=${DB_DATABASE:-"saas"} +DB_USERNAME=${DB_USERNAME:-"saas"} +DB_PASSWORD=${DB_PASSWORD:-"P89cZHS5d7dFyc9R"} +ES_HOST=${ES_HOST:-"http://localhost:9200"} +BATCH_SIZE=${BATCH_SIZE:-500} + +echo -e "\n${YELLOW}Configuration:${NC}" +echo " Tenant ID: $TENANT_ID" +echo " Recreate Index: $RECREATE_INDEX" +echo " MySQL: $DB_HOST:$DB_PORT/$DB_DATABASE" +echo " Elasticsearch: $ES_HOST" +echo " Batch Size: $BATCH_SIZE" + +# Validate parameters +if [ -z "$TENANT_ID" ]; then + echo -e "${RED}ERROR: Tenant ID is required${NC}" + echo "Usage: $0 [recreate_index]" + echo " tenant_id: Required, tenant ID" + echo " recreate_index: Optional, recreate index if exists (true/false, default: false)" + exit 1 +fi + +if [ -z "$DB_PASSWORD" ]; then + echo -e "${RED}ERROR: DB_PASSWORD未设置,请检查.env文件或环境变量${NC}" + exit 1 +fi + +# Build command +CMD="python scripts/ingest_shoplazza.py \ + --db-host $DB_HOST \ + --db-port $DB_PORT \ + --db-database $DB_DATABASE \ + --db-username $DB_USERNAME \ + --db-password $DB_PASSWORD \ + --tenant-id $TENANT_ID \ + --es-host $ES_HOST \ + --batch-size $BATCH_SIZE" + +if [ "$RECREATE_INDEX" = "true" ] || [ "$RECREATE_INDEX" = "1" ]; then + CMD="$CMD --recreate" + echo -e "\n${YELLOW}Warning: Index will be deleted and recreated!${NC}" +fi + +echo -e "\n${YELLOW}Starting data ingestion...${NC}" +eval $CMD + +if [ $? -eq 0 ]; then + echo -e "\n${GREEN}========================================${NC}" + echo -e "${GREEN}数据灌入完成!${NC}" + echo -e "${GREEN}========================================${NC}" +else + echo -e "\n${RED}========================================${NC}" + echo -e "${RED}数据灌入失败!${NC}" + echo -e "${RED}========================================${NC}" + exit 1 +fi diff --git a/scripts/indexer__old_2025_11/ingest_shoplazza.py b/scripts/indexer__old_2025_11/ingest_shoplazza.py new file mode 100644 index 0000000..60699c0 --- /dev/null +++ b/scripts/indexer__old_2025_11/ingest_shoplazza.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +""" +Shoplazza data ingestion script. + +Loads SPU and SKU data from MySQL and indexes into Elasticsearch using SPU transformer. +""" + +import sys +import os +import argparse +from pathlib import Path + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from utils.db_connector import create_db_connection +from utils.es_client import ESClient +from indexer.spu_transformer import SPUTransformer +from indexer.mapping_generator import load_mapping, DEFAULT_INDEX_NAME +from indexer.bulk_indexer import BulkIndexer + + +def main(): + parser = argparse.ArgumentParser(description='Ingest Shoplazza SPU/SKU data into Elasticsearch') + + # Database connection + parser.add_argument('--db-host', required=True, help='MySQL host') + parser.add_argument('--db-port', type=int, default=3306, help='MySQL port (default: 3306)') + parser.add_argument('--db-database', required=True, help='MySQL database name') + parser.add_argument('--db-username', required=True, help='MySQL username') + parser.add_argument('--db-password', required=True, help='MySQL password') + + # Tenant and index + parser.add_argument('--tenant-id', required=True, help='Tenant ID (required)') + parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host') + + # Options + parser.add_argument('--recreate', action='store_true', help='Recreate index if exists') + parser.add_argument('--batch-size', type=int, default=500, help='Batch size for indexing (default: 500)') + + args = parser.parse_args() + + print(f"Starting Shoplazza data ingestion for tenant: {args.tenant_id}") + + # Load mapping from JSON file + try: + mapping = load_mapping() + print(f"Loaded mapping configuration") + except Exception as e: + print(f"ERROR: Failed to load mapping: {e}") + return 1 + + index_name = DEFAULT_INDEX_NAME + + # Connect to MySQL + print(f"Connecting to MySQL: {args.db_host}:{args.db_port}/{args.db_database}") + try: + db_engine = create_db_connection( + host=args.db_host, + port=args.db_port, + database=args.db_database, + username=args.db_username, + password=args.db_password + ) + except Exception as e: + print(f"ERROR: Failed to connect to MySQL: {e}") + return 1 + + # Connect to Elasticsearch + es_host = args.es_host + es_username = os.environ.get('ES_USERNAME') + es_password = os.environ.get('ES_PASSWORD') + + print(f"Connecting to Elasticsearch: {es_host}") + if es_username and es_password: + print(f"Using authentication: {es_username}") + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) + else: + es_client = ESClient(hosts=[es_host]) + + if not es_client.ping(): + print(f"ERROR: Cannot connect to Elasticsearch at {es_host}") + return 1 + + # Create index if needed + if args.recreate: + if es_client.index_exists(index_name): + print(f"Deleting existing index: {index_name}") + if not es_client.delete_index(index_name): + print(f"ERROR: Failed to delete index '{index_name}'") + return 1 + + if not es_client.index_exists(index_name): + print(f"Creating index: {index_name}") + if not es_client.create_index(index_name, mapping): + print(f"ERROR: Failed to create index '{index_name}'") + print("Please check the mapping configuration and try again.") + return 1 + else: + print(f"Using existing index: {index_name}") + + # Initialize SPU transformer + print(f"Initializing SPU transformer for tenant: {args.tenant_id}") + transformer = SPUTransformer(db_engine, args.tenant_id) + + # Transform data + print("Transforming SPU and SKU data...") + try: + documents = transformer.transform_batch() + print(f"Transformed {len(documents)} SPU documents") + except Exception as e: + print(f"ERROR: Failed to transform data: {e}") + import traceback + traceback.print_exc() + return 1 + + if not documents: + print("WARNING: No documents to index") + return 0 + + # Bulk index + print(f"Indexing {len(documents)} documents (batch size: {args.batch_size})...") + indexer = BulkIndexer(es_client, index_name, batch_size=args.batch_size) + + try: + results = indexer.index_documents(documents, id_field="spu_id", show_progress=True) + print(f"\nIngestion complete:") + print(f" Success: {results['success']}") + print(f" Failed: {results['failed']}") + print(f" Time: {results.get('elapsed_time', 0):.2f}s") + + if results['failed'] > 0: + print(f"\nWARNING: {results['failed']} documents failed to index") + return 1 + + return 0 + except Exception as e: + print(f"ERROR: Failed to index documents: {e}") + import traceback + traceback.print_exc() + return 1 + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/scripts/indexer__old_2025_11/recreate_and_import.py b/scripts/indexer__old_2025_11/recreate_and_import.py new file mode 100755 index 0000000..af0a448 --- /dev/null +++ b/scripts/indexer__old_2025_11/recreate_and_import.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +""" +重建索引并导入数据的脚本。 + +清除旧索引,使用新的mapping重建索引,然后导入数据。 +""" + +import sys +import os +import argparse +from pathlib import Path + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from utils.db_connector import create_db_connection +from utils.es_client import ESClient +from indexer.mapping_generator import load_mapping, delete_index_if_exists, DEFAULT_INDEX_NAME +from indexer.spu_transformer import SPUTransformer +from indexer.bulk_indexer import BulkIndexer + + +def main(): + parser = argparse.ArgumentParser(description='重建ES索引并导入数据') + + # Database connection + parser.add_argument('--db-host', help='MySQL host (或使用环境变量 DB_HOST)') + parser.add_argument('--db-port', type=int, help='MySQL port (或使用环境变量 DB_PORT, 默认: 3306)') + parser.add_argument('--db-database', help='MySQL database (或使用环境变量 DB_DATABASE)') + parser.add_argument('--db-username', help='MySQL username (或使用环境变量 DB_USERNAME)') + parser.add_argument('--db-password', help='MySQL password (或使用环境变量 DB_PASSWORD)') + + # Tenant and ES + parser.add_argument('--tenant-id', required=True, help='Tenant ID (必需)') + parser.add_argument('--es-host', help='Elasticsearch host (或使用环境变量 ES_HOST, 默认: http://localhost:9200)') + + # Options + parser.add_argument('--batch-size', type=int, default=500, help='批量导入大小 (默认: 500)') + parser.add_argument('--skip-delete', action='store_true', help='跳过删除旧索引步骤') + + args = parser.parse_args() + + print("=" * 60) + print("重建ES索引并导入数据") + print("=" * 60) + + # 加载mapping + print("\n[1/4] 加载mapping配置...") + try: + mapping = load_mapping() + print(f"✓ 成功加载mapping配置") + except Exception as e: + print(f"✗ 加载mapping失败: {e}") + return 1 + + index_name = DEFAULT_INDEX_NAME + print(f"索引名称: {index_name}") + + # 连接Elasticsearch + print("\n[2/4] 连接Elasticsearch...") + es_host = args.es_host or os.environ.get('ES_HOST', 'http://localhost:9200') + es_username = os.environ.get('ES_USERNAME') + es_password = os.environ.get('ES_PASSWORD') + + print(f"ES地址: {es_host}") + if es_username: + print(f"ES用户名: {es_username}") + + try: + if es_username and es_password: + es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) + else: + es_client = ESClient(hosts=[es_host]) + + if not es_client.ping(): + print(f"✗ 无法连接到Elasticsearch: {es_host}") + return 1 + print("✓ Elasticsearch连接成功") + except Exception as e: + print(f"✗ 连接Elasticsearch失败: {e}") + return 1 + + # 删除旧索引 + if not args.skip_delete: + print("\n[3/4] 删除旧索引...") + if es_client.index_exists(index_name): + print(f"发现已存在的索引: {index_name}") + if delete_index_if_exists(es_client, index_name): + print(f"✓ 成功删除索引: {index_name}") + else: + print(f"✗ 删除索引失败: {index_name}") + return 1 + else: + print(f"索引不存在,跳过删除: {index_name}") + else: + print("\n[3/4] 跳过删除旧索引步骤") + + # 创建新索引 + print("\n[4/4] 创建新索引...") + try: + if es_client.index_exists(index_name): + print(f"✓ 索引已存在: {index_name},跳过创建") + else: + print(f"创建索引: {index_name}") + if es_client.create_index(index_name, mapping): + print(f"✓ 成功创建索引: {index_name}") + else: + print(f"✗ 创建索引失败: {index_name}") + return 1 + except Exception as e: + print(f"✗ 创建索引失败: {e}") + import traceback + traceback.print_exc() + return 1 + + # 连接MySQL + print("\n[5/5] 连接MySQL...") + db_host = args.db_host or os.environ.get('DB_HOST') + db_port = args.db_port or int(os.environ.get('DB_PORT', 3306)) + db_database = args.db_database or os.environ.get('DB_DATABASE') + db_username = args.db_username or os.environ.get('DB_USERNAME') + db_password = args.db_password or os.environ.get('DB_PASSWORD') + + if not all([db_host, db_database, db_username, db_password]): + print("✗ MySQL连接参数不完整") + print("请提供 --db-host, --db-database, --db-username, --db-password") + print("或设置环境变量: DB_HOST, DB_DATABASE, DB_USERNAME, DB_PASSWORD") + return 1 + + print(f"MySQL: {db_host}:{db_port}/{db_database}") + try: + db_engine = create_db_connection( + host=db_host, + port=db_port, + database=db_database, + username=db_username, + password=db_password + ) + print("✓ MySQL连接成功") + except Exception as e: + print(f"✗ 连接MySQL失败: {e}") + return 1 + + # 导入数据 + print("\n[6/6] 导入数据...") + print(f"Tenant ID: {args.tenant_id}") + print(f"批量大小: {args.batch_size}") + + try: + transformer = SPUTransformer(db_engine, args.tenant_id) + print("正在转换数据...") + documents = transformer.transform_batch() + print(f"✓ 转换完成: {len(documents)} 个文档") + + if not documents: + print("⚠ 没有数据需要导入") + return 0 + + print(f"正在导入数据到ES (批量大小: {args.batch_size})...") + indexer = BulkIndexer(es_client, index_name, batch_size=args.batch_size) + results = indexer.index_documents(documents, id_field="spu_id", show_progress=True) + + print(f"\n{'='*60}") + print("导入完成!") + print(f"{'='*60}") + print(f"成功: {results['success']}") + print(f"失败: {results['failed']}") + print(f"耗时: {results.get('elapsed_time', 0):.2f}秒") + + if results['failed'] > 0: + print(f"\n⚠ 警告: {results['failed']} 个文档导入失败") + return 1 + + return 0 + except Exception as e: + print(f"✗ 导入数据失败: {e}") + import traceback + traceback.print_exc() + return 1 + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/scripts/ingest.sh b/scripts/ingest.sh deleted file mode 100755 index a420fe3..0000000 --- a/scripts/ingest.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/bash - -# Unified data ingestion script for SearchEngine -# Ingests data from MySQL to Elasticsearch - -cd "$(dirname "$0")/.." -source /home/tw/miniconda3/etc/profile.d/conda.sh -conda activate searchengine - -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -RED='\033[0;31m' -NC='\033[0m' - -echo -e "${GREEN}========================================${NC}" -echo -e "${GREEN}数据灌入脚本${NC}" -echo -e "${GREEN}========================================${NC}" - -# Load config from .env file if it exists -if [ -f .env ]; then - set -a - source .env - set +a -fi - -# Parameters -TENANT_ID=${1:-""} -RECREATE_INDEX=${2:-"false"} - -DB_HOST=${DB_HOST:-"120.79.247.228"} -DB_PORT=${DB_PORT:-"3316"} -DB_DATABASE=${DB_DATABASE:-"saas"} -DB_USERNAME=${DB_USERNAME:-"saas"} -DB_PASSWORD=${DB_PASSWORD:-"P89cZHS5d7dFyc9R"} -ES_HOST=${ES_HOST:-"http://localhost:9200"} -BATCH_SIZE=${BATCH_SIZE:-500} - -echo -e "\n${YELLOW}Configuration:${NC}" -echo " Tenant ID: $TENANT_ID" -echo " Recreate Index: $RECREATE_INDEX" -echo " MySQL: $DB_HOST:$DB_PORT/$DB_DATABASE" -echo " Elasticsearch: $ES_HOST" -echo " Batch Size: $BATCH_SIZE" - -# Validate parameters -if [ -z "$TENANT_ID" ]; then - echo -e "${RED}ERROR: Tenant ID is required${NC}" - echo "Usage: $0 [recreate_index]" - echo " tenant_id: Required, tenant ID" - echo " recreate_index: Optional, recreate index if exists (true/false, default: false)" - exit 1 -fi - -if [ -z "$DB_PASSWORD" ]; then - echo -e "${RED}ERROR: DB_PASSWORD未设置,请检查.env文件或环境变量${NC}" - exit 1 -fi - -# Build command -CMD="python scripts/ingest_shoplazza.py \ - --db-host $DB_HOST \ - --db-port $DB_PORT \ - --db-database $DB_DATABASE \ - --db-username $DB_USERNAME \ - --db-password $DB_PASSWORD \ - --tenant-id $TENANT_ID \ - --es-host $ES_HOST \ - --batch-size $BATCH_SIZE" - -if [ "$RECREATE_INDEX" = "true" ] || [ "$RECREATE_INDEX" = "1" ]; then - CMD="$CMD --recreate" - echo -e "\n${YELLOW}Warning: Index will be deleted and recreated!${NC}" -fi - -echo -e "\n${YELLOW}Starting data ingestion...${NC}" -eval $CMD - -if [ $? -eq 0 ]; then - echo -e "\n${GREEN}========================================${NC}" - echo -e "${GREEN}数据灌入完成!${NC}" - echo -e "${GREEN}========================================${NC}" -else - echo -e "\n${RED}========================================${NC}" - echo -e "${RED}数据灌入失败!${NC}" - echo -e "${RED}========================================${NC}" - exit 1 -fi diff --git a/scripts/ingest_shoplazza.py b/scripts/ingest_shoplazza.py deleted file mode 100644 index 60699c0..0000000 --- a/scripts/ingest_shoplazza.py +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env python3 -""" -Shoplazza data ingestion script. - -Loads SPU and SKU data from MySQL and indexes into Elasticsearch using SPU transformer. -""" - -import sys -import os -import argparse -from pathlib import Path - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from utils.db_connector import create_db_connection -from utils.es_client import ESClient -from indexer.spu_transformer import SPUTransformer -from indexer.mapping_generator import load_mapping, DEFAULT_INDEX_NAME -from indexer.bulk_indexer import BulkIndexer - - -def main(): - parser = argparse.ArgumentParser(description='Ingest Shoplazza SPU/SKU data into Elasticsearch') - - # Database connection - parser.add_argument('--db-host', required=True, help='MySQL host') - parser.add_argument('--db-port', type=int, default=3306, help='MySQL port (default: 3306)') - parser.add_argument('--db-database', required=True, help='MySQL database name') - parser.add_argument('--db-username', required=True, help='MySQL username') - parser.add_argument('--db-password', required=True, help='MySQL password') - - # Tenant and index - parser.add_argument('--tenant-id', required=True, help='Tenant ID (required)') - parser.add_argument('--es-host', default='http://localhost:9200', help='Elasticsearch host') - - # Options - parser.add_argument('--recreate', action='store_true', help='Recreate index if exists') - parser.add_argument('--batch-size', type=int, default=500, help='Batch size for indexing (default: 500)') - - args = parser.parse_args() - - print(f"Starting Shoplazza data ingestion for tenant: {args.tenant_id}") - - # Load mapping from JSON file - try: - mapping = load_mapping() - print(f"Loaded mapping configuration") - except Exception as e: - print(f"ERROR: Failed to load mapping: {e}") - return 1 - - index_name = DEFAULT_INDEX_NAME - - # Connect to MySQL - print(f"Connecting to MySQL: {args.db_host}:{args.db_port}/{args.db_database}") - try: - db_engine = create_db_connection( - host=args.db_host, - port=args.db_port, - database=args.db_database, - username=args.db_username, - password=args.db_password - ) - except Exception as e: - print(f"ERROR: Failed to connect to MySQL: {e}") - return 1 - - # Connect to Elasticsearch - es_host = args.es_host - es_username = os.environ.get('ES_USERNAME') - es_password = os.environ.get('ES_PASSWORD') - - print(f"Connecting to Elasticsearch: {es_host}") - if es_username and es_password: - print(f"Using authentication: {es_username}") - es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) - else: - es_client = ESClient(hosts=[es_host]) - - if not es_client.ping(): - print(f"ERROR: Cannot connect to Elasticsearch at {es_host}") - return 1 - - # Create index if needed - if args.recreate: - if es_client.index_exists(index_name): - print(f"Deleting existing index: {index_name}") - if not es_client.delete_index(index_name): - print(f"ERROR: Failed to delete index '{index_name}'") - return 1 - - if not es_client.index_exists(index_name): - print(f"Creating index: {index_name}") - if not es_client.create_index(index_name, mapping): - print(f"ERROR: Failed to create index '{index_name}'") - print("Please check the mapping configuration and try again.") - return 1 - else: - print(f"Using existing index: {index_name}") - - # Initialize SPU transformer - print(f"Initializing SPU transformer for tenant: {args.tenant_id}") - transformer = SPUTransformer(db_engine, args.tenant_id) - - # Transform data - print("Transforming SPU and SKU data...") - try: - documents = transformer.transform_batch() - print(f"Transformed {len(documents)} SPU documents") - except Exception as e: - print(f"ERROR: Failed to transform data: {e}") - import traceback - traceback.print_exc() - return 1 - - if not documents: - print("WARNING: No documents to index") - return 0 - - # Bulk index - print(f"Indexing {len(documents)} documents (batch size: {args.batch_size})...") - indexer = BulkIndexer(es_client, index_name, batch_size=args.batch_size) - - try: - results = indexer.index_documents(documents, id_field="spu_id", show_progress=True) - print(f"\nIngestion complete:") - print(f" Success: {results['success']}") - print(f" Failed: {results['failed']}") - print(f" Time: {results.get('elapsed_time', 0):.2f}s") - - if results['failed'] > 0: - print(f"\nWARNING: {results['failed']} documents failed to index") - return 1 - - return 0 - except Exception as e: - print(f"ERROR: Failed to index documents: {e}") - import traceback - traceback.print_exc() - return 1 - - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/scripts/recreate_and_import.py b/scripts/recreate_and_import.py deleted file mode 100755 index af0a448..0000000 --- a/scripts/recreate_and_import.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python3 -""" -重建索引并导入数据的脚本。 - -清除旧索引,使用新的mapping重建索引,然后导入数据。 -""" - -import sys -import os -import argparse -from pathlib import Path - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from utils.db_connector import create_db_connection -from utils.es_client import ESClient -from indexer.mapping_generator import load_mapping, delete_index_if_exists, DEFAULT_INDEX_NAME -from indexer.spu_transformer import SPUTransformer -from indexer.bulk_indexer import BulkIndexer - - -def main(): - parser = argparse.ArgumentParser(description='重建ES索引并导入数据') - - # Database connection - parser.add_argument('--db-host', help='MySQL host (或使用环境变量 DB_HOST)') - parser.add_argument('--db-port', type=int, help='MySQL port (或使用环境变量 DB_PORT, 默认: 3306)') - parser.add_argument('--db-database', help='MySQL database (或使用环境变量 DB_DATABASE)') - parser.add_argument('--db-username', help='MySQL username (或使用环境变量 DB_USERNAME)') - parser.add_argument('--db-password', help='MySQL password (或使用环境变量 DB_PASSWORD)') - - # Tenant and ES - parser.add_argument('--tenant-id', required=True, help='Tenant ID (必需)') - parser.add_argument('--es-host', help='Elasticsearch host (或使用环境变量 ES_HOST, 默认: http://localhost:9200)') - - # Options - parser.add_argument('--batch-size', type=int, default=500, help='批量导入大小 (默认: 500)') - parser.add_argument('--skip-delete', action='store_true', help='跳过删除旧索引步骤') - - args = parser.parse_args() - - print("=" * 60) - print("重建ES索引并导入数据") - print("=" * 60) - - # 加载mapping - print("\n[1/4] 加载mapping配置...") - try: - mapping = load_mapping() - print(f"✓ 成功加载mapping配置") - except Exception as e: - print(f"✗ 加载mapping失败: {e}") - return 1 - - index_name = DEFAULT_INDEX_NAME - print(f"索引名称: {index_name}") - - # 连接Elasticsearch - print("\n[2/4] 连接Elasticsearch...") - es_host = args.es_host or os.environ.get('ES_HOST', 'http://localhost:9200') - es_username = os.environ.get('ES_USERNAME') - es_password = os.environ.get('ES_PASSWORD') - - print(f"ES地址: {es_host}") - if es_username: - print(f"ES用户名: {es_username}") - - try: - if es_username and es_password: - es_client = ESClient(hosts=[es_host], username=es_username, password=es_password) - else: - es_client = ESClient(hosts=[es_host]) - - if not es_client.ping(): - print(f"✗ 无法连接到Elasticsearch: {es_host}") - return 1 - print("✓ Elasticsearch连接成功") - except Exception as e: - print(f"✗ 连接Elasticsearch失败: {e}") - return 1 - - # 删除旧索引 - if not args.skip_delete: - print("\n[3/4] 删除旧索引...") - if es_client.index_exists(index_name): - print(f"发现已存在的索引: {index_name}") - if delete_index_if_exists(es_client, index_name): - print(f"✓ 成功删除索引: {index_name}") - else: - print(f"✗ 删除索引失败: {index_name}") - return 1 - else: - print(f"索引不存在,跳过删除: {index_name}") - else: - print("\n[3/4] 跳过删除旧索引步骤") - - # 创建新索引 - print("\n[4/4] 创建新索引...") - try: - if es_client.index_exists(index_name): - print(f"✓ 索引已存在: {index_name},跳过创建") - else: - print(f"创建索引: {index_name}") - if es_client.create_index(index_name, mapping): - print(f"✓ 成功创建索引: {index_name}") - else: - print(f"✗ 创建索引失败: {index_name}") - return 1 - except Exception as e: - print(f"✗ 创建索引失败: {e}") - import traceback - traceback.print_exc() - return 1 - - # 连接MySQL - print("\n[5/5] 连接MySQL...") - db_host = args.db_host or os.environ.get('DB_HOST') - db_port = args.db_port or int(os.environ.get('DB_PORT', 3306)) - db_database = args.db_database or os.environ.get('DB_DATABASE') - db_username = args.db_username or os.environ.get('DB_USERNAME') - db_password = args.db_password or os.environ.get('DB_PASSWORD') - - if not all([db_host, db_database, db_username, db_password]): - print("✗ MySQL连接参数不完整") - print("请提供 --db-host, --db-database, --db-username, --db-password") - print("或设置环境变量: DB_HOST, DB_DATABASE, DB_USERNAME, DB_PASSWORD") - return 1 - - print(f"MySQL: {db_host}:{db_port}/{db_database}") - try: - db_engine = create_db_connection( - host=db_host, - port=db_port, - database=db_database, - username=db_username, - password=db_password - ) - print("✓ MySQL连接成功") - except Exception as e: - print(f"✗ 连接MySQL失败: {e}") - return 1 - - # 导入数据 - print("\n[6/6] 导入数据...") - print(f"Tenant ID: {args.tenant_id}") - print(f"批量大小: {args.batch_size}") - - try: - transformer = SPUTransformer(db_engine, args.tenant_id) - print("正在转换数据...") - documents = transformer.transform_batch() - print(f"✓ 转换完成: {len(documents)} 个文档") - - if not documents: - print("⚠ 没有数据需要导入") - return 0 - - print(f"正在导入数据到ES (批量大小: {args.batch_size})...") - indexer = BulkIndexer(es_client, index_name, batch_size=args.batch_size) - results = indexer.index_documents(documents, id_field="spu_id", show_progress=True) - - print(f"\n{'='*60}") - print("导入完成!") - print(f"{'='*60}") - print(f"成功: {results['success']}") - print(f"失败: {results['failed']}") - print(f"耗时: {results.get('elapsed_time', 0):.2f}秒") - - if results['failed'] > 0: - print(f"\n⚠ 警告: {results['failed']} 个文档导入失败") - return 1 - - return 0 - except Exception as e: - print(f"✗ 导入数据失败: {e}") - import traceback - traceback.print_exc() - return 1 - - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/scripts/run_tests.py b/scripts/run_tests.py deleted file mode 100755 index 67d900f..0000000 --- a/scripts/run_tests.py +++ /dev/null @@ -1,705 +0,0 @@ -#!/usr/bin/env python3 -""" -测试执行脚本 - -运行完整的测试流水线,包括: -- 环境检查 -- 单元测试 -- 集成测试 -- 性能测试 -- 测试报告生成 -""" - -import os -import sys -import subprocess -import time -import json -import argparse -import logging -from pathlib import Path -from typing import Dict, List, Optional, Any -from dataclasses import dataclass, asdict -from datetime import datetime - - -# 添加项目根目录到Python路径 -project_root = Path(__file__).parent.parent -sys.path.insert(0, str(project_root)) - - -@dataclass -class TestResult: - """测试结果数据结构""" - name: str - status: str # "passed", "failed", "skipped", "error" - duration: float - details: Optional[Dict[str, Any]] = None - output: Optional[str] = None - error: Optional[str] = None - - -@dataclass -class TestSuiteResult: - """测试套件结果""" - name: str - total_tests: int - passed: int - failed: int - skipped: int - errors: int - duration: float - results: List[TestResult] - - -class TestRunner: - """测试运行器""" - - def __init__(self, config: Dict[str, Any]): - self.config = config - self.logger = self._setup_logger() - self.results: List[TestSuiteResult] = [] - self.start_time = time.time() - - def _setup_logger(self) -> logging.Logger: - """设置日志记录器""" - log_level = getattr(logging, self.config.get('log_level', 'INFO').upper()) - logging.basicConfig( - level=log_level, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - handlers=[ - logging.StreamHandler(), - logging.FileHandler( - project_root / 'test_logs' / f'test_run_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log' - ) - ] - ) - return logging.getLogger(__name__) - - def _run_command(self, cmd: List[str], cwd: Optional[Path] = None, env: Optional[Dict[str, str]] = None) -> subprocess.CompletedProcess: - """运行命令""" - try: - self.logger.info(f"执行命令: {' '.join(cmd)}") - - # 设置环境变量 - process_env = os.environ.copy() - if env: - process_env.update(env) - - result = subprocess.run( - cmd, - cwd=cwd or project_root, - env=process_env, - capture_output=True, - text=True, - timeout=self.config.get('test_timeout', 300) - ) - - self.logger.debug(f"命令返回码: {result.returncode}") - if result.stdout: - self.logger.debug(f"标准输出: {result.stdout[:500]}...") - if result.stderr: - self.logger.debug(f"标准错误: {result.stderr[:500]}...") - - return result - - except subprocess.TimeoutExpired: - self.logger.error(f"命令执行超时: {' '.join(cmd)}") - raise - except Exception as e: - self.logger.error(f"命令执行失败: {e}") - raise - - def check_environment(self) -> bool: - """检查测试环境""" - self.logger.info("检查测试环境...") - - checks = [] - - # 检查Python环境 - try: - python_version = sys.version - self.logger.info(f"Python版本: {python_version}") - checks.append(("Python", True, f"版本 {python_version}")) - except Exception as e: - checks.append(("Python", False, str(e))) - - # 检查conda环境 - try: - result = self._run_command(['conda', '--version']) - if result.returncode == 0: - conda_version = result.stdout.strip() - self.logger.info(f"Conda版本: {conda_version}") - checks.append(("Conda", True, conda_version)) - else: - checks.append(("Conda", False, "未找到conda")) - except Exception as e: - checks.append(("Conda", False, str(e))) - - # 检查依赖包 - required_packages = [ - 'pytest', 'fastapi', 'elasticsearch', 'numpy', - 'torch', 'transformers', 'pyyaml' - ] - - for package in required_packages: - try: - result = self._run_command(['python', '-c', f'import {package}']) - if result.returncode == 0: - checks.append((package, True, "已安装")) - else: - checks.append((package, False, "导入失败")) - except Exception as e: - checks.append((package, False, str(e))) - - # 检查Elasticsearch - try: - es_host = os.getenv('ES_HOST', 'http://localhost:9200') - result = self._run_command(['curl', '-s', f'{es_host}/_cluster/health']) - if result.returncode == 0: - health_data = json.loads(result.stdout) - status = health_data.get('status', 'unknown') - self.logger.info(f"Elasticsearch状态: {status}") - checks.append(("Elasticsearch", True, f"状态: {status}")) - else: - checks.append(("Elasticsearch", False, "连接失败")) - except Exception as e: - checks.append(("Elasticsearch", False, str(e))) - - # 检查API服务 - try: - api_host = os.getenv('API_HOST', '127.0.0.1') - api_port = os.getenv('API_PORT', '6003') - result = self._run_command(['curl', '-s', f'http://{api_host}:{api_port}/health']) - if result.returncode == 0: - health_data = json.loads(result.stdout) - status = health_data.get('status', 'unknown') - self.logger.info(f"API服务状态: {status}") - checks.append(("API服务", True, f"状态: {status}")) - else: - checks.append(("API服务", False, "连接失败")) - except Exception as e: - checks.append(("API服务", False, str(e))) - - # 输出检查结果 - self.logger.info("环境检查结果:") - all_passed = True - for name, passed, details in checks: - status = "✓" if passed else "✗" - self.logger.info(f" {status} {name}: {details}") - if not passed: - all_passed = False - - return all_passed - - def run_unit_tests(self) -> TestSuiteResult: - """运行单元测试""" - self.logger.info("运行单元测试...") - - start_time = time.time() - cmd = [ - 'python', '-m', 'pytest', - 'tests/unit/', - '-v', - '--tb=short', - '--json-report', - '--json-report-file=test_logs/unit_test_results.json' - ] - - try: - result = self._run_command(cmd) - duration = time.time() - start_time - - # 解析测试结果 - if result.returncode == 0: - status = "passed" - else: - status = "failed" - - # 尝试解析JSON报告 - test_results = [] - passed = failed = skipped = errors = 0 - - try: - with open(project_root / 'test_logs' / 'unit_test_results.json', 'r') as f: - report_data = json.load(f) - - summary = report_data.get('summary', {}) - total = summary.get('total', 0) - passed = summary.get('passed', 0) - failed = summary.get('failed', 0) - skipped = summary.get('skipped', 0) - errors = summary.get('error', 0) - - # 获取详细结果 - for test in report_data.get('tests', []): - test_results.append(TestResult( - name=test.get('nodeid', ''), - status=test.get('outcome', 'unknown'), - duration=test.get('duration', 0.0), - details=test - )) - - except Exception as e: - self.logger.warning(f"无法解析单元测试JSON报告: {e}") - - suite_result = TestSuiteResult( - name="单元测试", - total_tests=passed + failed + skipped + errors, - passed=passed, - failed=failed, - skipped=skipped, - errors=errors, - duration=duration, - results=test_results - ) - - self.results.append(suite_result) - self.logger.info(f"单元测试完成: {suite_result.total_tests}个测试, " - f"{suite_result.passed}通过, {suite_result.failed}失败, " - f"{suite_result.skipped}跳过, {suite_result.errors}错误") - - return suite_result - - except Exception as e: - self.logger.error(f"单元测试执行失败: {e}") - raise - - def run_integration_tests(self) -> TestSuiteResult: - """运行集成测试""" - self.logger.info("运行集成测试...") - - start_time = time.time() - cmd = [ - 'python', '-m', 'pytest', - 'tests/integration/', - '-v', - '--tb=short', - '-m', 'not slow', # 排除慢速测试 - '--json-report', - '--json-report-file=test_logs/integration_test_results.json' - ] - - try: - result = self._run_command(cmd) - duration = time.time() - start_time - - # 解析测试结果 - if result.returncode == 0: - status = "passed" - else: - status = "failed" - - # 尝试解析JSON报告 - test_results = [] - passed = failed = skipped = errors = 0 - - try: - with open(project_root / 'test_logs' / 'integration_test_results.json', 'r') as f: - report_data = json.load(f) - - summary = report_data.get('summary', {}) - total = summary.get('total', 0) - passed = summary.get('passed', 0) - failed = summary.get('failed', 0) - skipped = summary.get('skipped', 0) - errors = summary.get('error', 0) - - for test in report_data.get('tests', []): - test_results.append(TestResult( - name=test.get('nodeid', ''), - status=test.get('outcome', 'unknown'), - duration=test.get('duration', 0.0), - details=test - )) - - except Exception as e: - self.logger.warning(f"无法解析集成测试JSON报告: {e}") - - suite_result = TestSuiteResult( - name="集成测试", - total_tests=passed + failed + skipped + errors, - passed=passed, - failed=failed, - skipped=skipped, - errors=errors, - duration=duration, - results=test_results - ) - - self.results.append(suite_result) - self.logger.info(f"集成测试完成: {suite_result.total_tests}个测试, " - f"{suite_result.passed}通过, {suite_result.failed}失败, " - f"{suite_result.skipped}跳过, {suite_result.errors}错误") - - return suite_result - - except Exception as e: - self.logger.error(f"集成测试执行失败: {e}") - raise - - def run_api_tests(self) -> TestSuiteResult: - """运行API测试""" - self.logger.info("运行API测试...") - - start_time = time.time() - cmd = [ - 'python', '-m', 'pytest', - 'tests/integration/test_api_integration.py', - '-v', - '--tb=short', - '--json-report', - '--json-report-file=test_logs/api_test_results.json' - ] - - try: - result = self._run_command(cmd) - duration = time.time() - start_time - - # 解析测试结果 - if result.returncode == 0: - status = "passed" - else: - status = "failed" - - # 尝试解析JSON报告 - test_results = [] - passed = failed = skipped = errors = 0 - - try: - with open(project_root / 'test_logs' / 'api_test_results.json', 'r') as f: - report_data = json.load(f) - - summary = report_data.get('summary', {}) - total = summary.get('total', 0) - passed = summary.get('passed', 0) - failed = summary.get('failed', 0) - skipped = summary.get('skipped', 0) - errors = summary.get('error', 0) - - for test in report_data.get('tests', []): - test_results.append(TestResult( - name=test.get('nodeid', ''), - status=test.get('outcome', 'unknown'), - duration=test.get('duration', 0.0), - details=test - )) - - except Exception as e: - self.logger.warning(f"无法解析API测试JSON报告: {e}") - - suite_result = TestSuiteResult( - name="API测试", - total_tests=passed + failed + skipped + errors, - passed=passed, - failed=failed, - skipped=skipped, - errors=errors, - duration=duration, - results=test_results - ) - - self.results.append(suite_result) - self.logger.info(f"API测试完成: {suite_result.total_tests}个测试, " - f"{suite_result.passed}通过, {suite_result.failed}失败, " - f"{suite_result.skipped}跳过, {suite_result.errors}错误") - - return suite_result - - except Exception as e: - self.logger.error(f"API测试执行失败: {e}") - raise - - def run_performance_tests(self) -> TestSuiteResult: - """运行性能测试""" - self.logger.info("运行性能测试...") - - start_time = time.time() - - # 简单的性能测试 - 测试搜索响应时间 - test_queries = [ - "红色连衣裙", - "智能手机", - "笔记本电脑 AND (游戏 OR 办公)", - "无线蓝牙耳机" - ] - - test_results = [] - passed = failed = 0 - - for query in test_queries: - try: - query_start = time.time() - result = self._run_command([ - 'curl', '-s', - f'http://{os.getenv("API_HOST", "127.0.0.1")}:{os.getenv("API_PORT", "6003")}/search', - '-d', f'q={query}' - ]) - query_duration = time.time() - query_start - - if result.returncode == 0: - response_data = json.loads(result.stdout) - took_ms = response_data.get('took_ms', 0) - - # 性能阈值:响应时间不超过2秒 - if took_ms <= 2000: - test_results.append(TestResult( - name=f"搜索性能测试: {query}", - status="passed", - duration=query_duration, - details={"took_ms": took_ms, "response_size": len(result.stdout)} - )) - passed += 1 - else: - test_results.append(TestResult( - name=f"搜索性能测试: {query}", - status="failed", - duration=query_duration, - details={"took_ms": took_ms, "threshold": 2000} - )) - failed += 1 - else: - test_results.append(TestResult( - name=f"搜索性能测试: {query}", - status="failed", - duration=query_duration, - error=result.stderr - )) - failed += 1 - - except Exception as e: - test_results.append(TestResult( - name=f"搜索性能测试: {query}", - status="error", - duration=0.0, - error=str(e) - )) - failed += 1 - - duration = time.time() - start_time - - suite_result = TestSuiteResult( - name="性能测试", - total_tests=len(test_results), - passed=passed, - failed=failed, - skipped=0, - errors=0, - duration=duration, - results=test_results - ) - - self.results.append(suite_result) - self.logger.info(f"性能测试完成: {suite_result.total_tests}个测试, " - f"{suite_result.passed}通过, {suite_result.failed}失败") - - return suite_result - - def generate_report(self) -> str: - """生成测试报告""" - self.logger.info("生成测试报告...") - - # 计算总体统计 - total_tests = sum(suite.total_tests for suite in self.results) - total_passed = sum(suite.passed for suite in self.results) - total_failed = sum(suite.failed for suite in self.results) - total_skipped = sum(suite.skipped for suite in self.results) - total_errors = sum(suite.errors for suite in self.results) - total_duration = sum(suite.duration for suite in self.results) - - # 生成报告数据 - report_data = { - "timestamp": datetime.now().isoformat(), - "summary": { - "total_tests": total_tests, - "passed": total_passed, - "failed": total_failed, - "skipped": total_skipped, - "errors": total_errors, - "success_rate": (total_passed / total_tests * 100) if total_tests > 0 else 0, - "total_duration": total_duration - }, - "suites": [asdict(suite) for suite in self.results] - } - - # 保存JSON报告 - report_file = project_root / 'test_logs' / f'test_report_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json' - with open(report_file, 'w', encoding='utf-8') as f: - json.dump(report_data, f, indent=2, ensure_ascii=False) - - # 生成文本报告 - text_report = self._generate_text_report(report_data) - - report_file_text = project_root / 'test_logs' / f'test_report_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt' - with open(report_file_text, 'w', encoding='utf-8') as f: - f.write(text_report) - - self.logger.info(f"测试报告已保存: {report_file}") - self.logger.info(f"文本报告已保存: {report_file_text}") - - return text_report - - def _generate_text_report(self, report_data: Dict[str, Any]) -> str: - """生成文本格式的测试报告""" - lines = [] - - # 标题 - lines.append("=" * 60) - lines.append("搜索引擎测试报告") - lines.append("=" * 60) - lines.append(f"时间: {report_data['timestamp']}") - lines.append("") - - # 摘要 - summary = report_data['summary'] - lines.append("测试摘要") - lines.append("-" * 30) - lines.append(f"总测试数: {summary['total_tests']}") - lines.append(f"通过: {summary['passed']}") - lines.append(f"失败: {summary['failed']}") - lines.append(f"跳过: {summary['skipped']}") - lines.append(f"错误: {summary['errors']}") - lines.append(f"成功率: {summary['success_rate']:.1f}%") - lines.append(f"总耗时: {summary['total_duration']:.2f}秒") - lines.append("") - - # 各测试套件详情 - lines.append("测试套件详情") - lines.append("-" * 30) - - for suite in report_data['suites']: - lines.append(f"\n{suite['name']}:") - lines.append(f" 总数: {suite['total_tests']}, 通过: {suite['passed']}, " - f"失败: {suite['failed']}, 跳过: {suite['skipped']}, 错误: {suite['errors']}") - lines.append(f" 耗时: {suite['duration']:.2f}秒") - - # 显示失败的测试 - failed_tests = [r for r in suite['results'] if r['status'] in ['failed', 'error']] - if failed_tests: - lines.append(" 失败的测试:") - for test in failed_tests[:5]: # 只显示前5个 - lines.append(f" - {test['name']}: {test['status']}") - if test.get('error'): - lines.append(f" 错误: {test['error'][:100]}...") - if len(failed_tests) > 5: - lines.append(f" ... 还有 {len(failed_tests) - 5} 个失败的测试") - - return "\n".join(lines) - - def run_all_tests(self) -> bool: - """运行所有测试""" - try: - # 确保日志目录存在 - (project_root / 'test_logs').mkdir(exist_ok=True) - - # 加载环境变量 - env_file = project_root / 'test_env.sh' - if env_file.exists(): - self.logger.info("加载测试环境变量...") - result = self._run_command(['bash', str(env_file)]) - if result.returncode != 0: - self.logger.warning("环境变量加载失败,继续使用默认配置") - - # 检查环境 - if not self.check_environment(): - self.logger.error("环境检查失败,请先启动测试环境") - return False - - # 运行各类测试 - test_suites = [ - ("unit", self.run_unit_tests), - ("integration", self.run_integration_tests), - ("api", self.run_api_tests), - ("performance", self.run_performance_tests) - ] - - failed_suites = [] - - for suite_name, suite_func in test_suites: - if suite_name in self.config.get('skip_suites', []): - self.logger.info(f"跳过 {suite_name} 测试") - continue - - try: - suite_result = suite_func() - if suite_result.failed > 0 or suite_result.errors > 0: - failed_suites.append(suite_name) - except Exception as e: - self.logger.error(f"{suite_name} 测试执行失败: {e}") - failed_suites.append(suite_name) - - # 生成报告 - report = self.generate_report() - print(report) - - # 返回测试结果 - return len(failed_suites) == 0 - - except Exception as e: - self.logger.error(f"测试执行失败: {e}") - return False - - -def main(): - """主函数""" - parser = argparse.ArgumentParser(description="运行搜索引擎测试流水线") - parser.add_argument('--skip-suites', nargs='+', - choices=['unit', 'integration', 'api', 'performance'], - help='跳过指定的测试套件') - parser.add_argument('--log-level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], - default='INFO', help='日志级别') - parser.add_argument('--test-timeout', type=int, default=300, - help='单个测试超时时间(秒)') - parser.add_argument('--start-env', action='store_true', - help='启动测试环境后运行测试') - parser.add_argument('--stop-env', action='store_true', - help='测试完成后停止测试环境') - - args = parser.parse_args() - - # 配置 - config = { - 'skip_suites': args.skip_suites or [], - 'log_level': args.log_level, - 'test_timeout': args.test_timeout - } - - # 启动环境 - if args.start_env: - print("启动测试环境...") - result = subprocess.run([ - 'bash', str(project_root / 'scripts' / 'start_test_environment.sh') - ], capture_output=True, text=True) - - if result.returncode != 0: - print(f"测试环境启动失败: {result.stderr}") - return 1 - - print("测试环境启动成功") - time.sleep(5) # 等待服务完全启动 - - try: - # 运行测试 - runner = TestRunner(config) - success = runner.run_all_tests() - - if success: - print("\n🎉 所有测试通过!") - return_code = 0 - else: - print("\n❌ 部分测试失败,请查看日志") - return_code = 1 - - finally: - # 停止环境 - if args.stop_env: - print("\n停止测试环境...") - subprocess.run([ - 'bash', str(project_root / 'scripts' / 'stop_test_environment.sh') - ]) - - return return_code - - -if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file diff --git a/scripts/start_test_environment.sh b/scripts/start_test_environment.sh deleted file mode 100755 index e9d3727..0000000 --- a/scripts/start_test_environment.sh +++ /dev/null @@ -1,275 +0,0 @@ -#!/bin/bash - -# 启动测试环境脚本 -# 用于在commit前自动化测试时启动必要的依赖服务 - -set -e - -# 颜色定义 -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# 配置 -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" -TEST_LOG_DIR="$PROJECT_ROOT/test_logs" -PID_FILE="$PROJECT_ROOT/test_environment.pid" - -# 日志文件 -LOG_FILE="$TEST_LOG_DIR/test_environment.log" -ES_LOG_FILE="$TEST_LOG_DIR/elasticsearch.log" -API_LOG_FILE="$TEST_LOG_DIR/api_test.log" - -echo -e "${GREEN}========================================${NC}" -echo -e "${GREEN}启动测试环境${NC}" -echo -e "${GREEN}========================================${NC}" - -# 创建日志目录 -mkdir -p "$TEST_LOG_DIR" - -# 检查是否已经运行 -if [ -f "$PID_FILE" ]; then - OLD_PID=$(cat "$PID_FILE") - if ps -p $OLD_PID > /dev/null 2>&1; then - echo -e "${YELLOW}测试环境已在运行 (PID: $OLD_PID)${NC}" - echo -e "${BLUE}如需重启,请先运行: ./scripts/stop_test_environment.sh${NC}" - exit 0 - else - rm -f "$PID_FILE" - fi -fi - -# 激活conda环境 -echo -e "${BLUE}激活conda环境...${NC}" -source /home/tw/miniconda3/etc/profile.d/conda.sh -conda activate searchengine - -# 设置环境变量 -echo -e "${BLUE}设置测试环境变量...${NC}" -export PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" -export TESTING_MODE=true -export LOG_LEVEL=DEBUG - -# Elasticsearch配置 -export ES_HOST="http://localhost:9200" -export ES_USERNAME="elastic" -export ES_PASSWORD="changeme" - -# API配置 -export API_HOST="127.0.0.1" -export API_PORT="6003" # 使用不同的端口避免冲突 -export TENANT_ID="test_tenant" - -# 测试配置 -export TEST_TIMEOUT=60 -export TEST_RETRY_COUNT=3 - -echo -e "${BLUE}环境配置:${NC}" -echo " ES_HOST: $ES_HOST" -echo " API_HOST: $API_HOST:$API_PORT" -echo " TENANT_ID: $TENANT_ID" -echo " LOG_LEVEL: $LOG_LEVEL" -echo " TESTING_MODE: $TESTING_MODE" - -# 检查Elasticsearch是否运行 -echo -e "${BLUE}检查Elasticsearch状态...${NC}" -if curl -s "$ES_HOST/_cluster/health" > /dev/null; then - echo -e "${GREEN}✓ Elasticsearch正在运行${NC}" -else - echo -e "${YELLOW}⚠ Elasticsearch未运行,尝试启动...${NC}" - - # 尝试启动Elasticsearch(如果安装了本地版本) - if command -v elasticsearch &> /dev/null; then - echo -e "${BLUE}启动本地Elasticsearch...${NC}" - elasticsearch -d -p "$TEST_LOG_DIR/es.pid" - sleep 10 - - # 再次检查 - if curl -s "$ES_HOST/_cluster/health" > /dev/null; then - echo -e "${GREEN}✓ Elasticsearch启动成功${NC}" - else - echo -e "${RED}✗ Elasticsearch启动失败${NC}" - echo -e "${YELLOW}请手动启动Elasticsearch或配置远程ES地址${NC}" - exit 1 - fi - else - echo -e "${RED}✗ 未找到本地Elasticsearch${NC}" - echo -e "${YELLOW}请启动Elasticsearch服务或修改ES_HOST配置${NC}" - exit 1 - fi -fi - -# 等待Elasticsearch就绪 -echo -e "${BLUE}等待Elasticsearch就绪...${NC}" -for i in {1..30}; do - if curl -s "$ES_HOST/_cluster/health?wait_for_status=yellow&timeout=1s" | grep -q '"status":"green\|yellow"'; then - echo -e "${GREEN}✓ Elasticsearch已就绪${NC}" - break - fi - if [ $i -eq 30 ]; then - echo -e "${RED}✗ Elasticsearch就绪超时${NC}" - exit 1 - fi - sleep 1 -done - -# 创建测试索引(如果需要) -echo -e "${BLUE}准备测试数据索引...${NC}" -curl -X PUT "$ES_HOST/test_products" -H 'Content-Type: application/json' -d' -{ - "settings": { - "number_of_shards": 1, - "number_of_replicas": 0, - "analysis": { - "analyzer": { - "ansj": { - "type": "custom", - "tokenizer": "keyword" - } - } - } - }, - "mappings": { - "properties": { - "name": { - "type": "text", - "analyzer": "ansj" - }, - "brand_name": { - "type": "text", - "analyzer": "ansj" - }, - "tags": { - "type": "text", - "analyzer": "ansj" - }, - "price": { - "type": "double" - }, - "category_id": { - "type": "integer" - }, - "spu_id": { - "type": "keyword" - }, - "text_embedding": { - "type": "dense_vector", - "dims": 1024 - } - } - } -}' > /dev/null 2>&1 || echo -e "${YELLOW}索引可能已存在${NC}" - -# 插入测试数据 -echo -e "${BLUE}插入测试数据...${NC}" -curl -X POST "$ES_HOST/test_products/_bulk" -H 'Content-Type: application/json' -d' -{"index": {"_id": "1"}} -{"name": "红色连衣裙", "brand_name": "测试品牌", "tags": ["红色", "连衣裙", "女装"], "price": 299.0, "category_id": 1, "spu_id": "dress_001"} -{"index": {"_id": "2"}} -{"name": "蓝色连衣裙", "brand_name": "测试品牌", "tags": ["蓝色", "连衣裙", "女装"], "price": 399.0, "category_id": 1, "spu_id": "dress_002"} -{"index": {"_id": "3"}} -{"name": "智能手机", "brand_name": "科技品牌", "tags": ["智能", "手机", "数码"], "price": 2999.0, "category_id": 2, "spu_id": "phone_001"} -{"index": {"_id": "4"}} -{"name": "笔记本电脑", "brand_name": "科技品牌", "tags": ["笔记本", "电脑", "办公"], "price": 5999.0, "category_id": 3, "spu_id": "laptop_001"} -' > /dev/null 2>&1 || echo -e "${YELLOW}测试数据可能已存在${NC}" - -# 启动测试API服务 -echo -e "${BLUE}启动测试API服务...${NC}" -cd "$PROJECT_ROOT" - -# 使用后台模式启动API -python -m api.app \ - --host $API_HOST \ - --port $API_PORT \ - --tenant $TENANT_ID \ - --es-host $ES_HOST \ - > "$API_LOG_FILE" 2>&1 & - -API_PID=$! -echo $API_PID > "$PID_FILE" - -# 等待API服务启动 -echo -e "${BLUE}等待API服务启动...${NC}" -for i in {1..30}; do - if curl -s "http://$API_HOST:$API_PORT/health" > /dev/null; then - echo -e "${GREEN}✓ API服务已就绪 (PID: $API_PID)${NC}" - break - fi - if [ $i -eq 30 ]; then - echo -e "${RED}✗ API服务启动超时${NC}" - kill $API_PID 2>/dev/null || true - rm -f "$PID_FILE" - exit 1 - fi - sleep 1 -done - -# 验证测试环境 -echo -e "${BLUE}验证测试环境...${NC}" - -# 测试Elasticsearch连接 -if curl -s "$ES_HOST/_cluster/health" | grep -q '"status":"green\|yellow"'; then - echo -e "${GREEN}✓ Elasticsearch连接正常${NC}" -else - echo -e "${RED}✗ Elasticsearch连接失败${NC}" - exit 1 -fi - -# 测试API健康检查 -if curl -s "http://$API_HOST:$API_PORT/health" | grep -q '"status"'; then - echo -e "${GREEN}✓ API服务健康检查通过${NC}" -else - echo -e "${RED}✗ API服务健康检查失败${NC}" - exit 1 -fi - -# 测试基本搜索功能 -if curl -s "http://$API_HOST:$API_PORT/search?q=红色连衣裙" | grep -q '"hits"'; then - echo -e "${GREEN}✓ 基本搜索功能正常${NC}" -else - echo -e "${YELLOW}⚠ 基本搜索功能可能有问题,但继续进行${NC}" -fi - -# 输出环境信息 -echo -e "${GREEN}========================================${NC}" -echo -e "${GREEN}测试环境启动完成!${NC}" -echo -e "${GREEN}========================================${NC}" -echo -e "${BLUE}服务信息:${NC}" -echo " Elasticsearch: $ES_HOST" -echo " API服务: http://$API_HOST:$API_PORT" -echo " 测试客户: $TENANT_ID" -echo -e "${BLUE}进程信息:${NC}" -echo " API PID: $API_PID" -echo " PID文件: $PID_FILE" -echo -e "${BLUE}日志文件:${NC}" -echo " 环境日志: $LOG_FILE" -echo " API日志: $API_LOG_FILE" -echo " ES日志: $ES_LOG_FILE" -echo -e "${BLUE}测试命令:${NC}" -echo " 运行所有测试: python scripts/run_tests.py" -echo " 单元测试: pytest tests/unit/ -v" -echo " 集成测试: pytest tests/integration/ -v" -echo " API测试: pytest tests/integration/test_api_integration.py -v" -echo "e${NC}" -echo -e "${BLUE}停止环境: ./scripts/stop_test_environment.sh${NC}" - -# 保存环境变量到文件供测试脚本使用 -cat > "$PROJECT_ROOT/test_env.sh" << EOF -#!/bin/bash -export ES_HOST="$ES_HOST" -export ES_USERNAME="$ES_USERNAME" -export ES_PASSWORD="$ES_PASSWORD" -export API_HOST="$API_HOST" -export API_PORT="$API_PORT" -export TENANT_ID="$TENANT_ID" -export TESTING_MODE="$TESTING_MODE" -export LOG_LEVEL="$LOG_LEVEL" -export PYTHONPATH="$PROJECT_ROOT:\$PYTHONPATH" -EOF - -chmod +x "$PROJECT_ROOT/test_env.sh" - -echo -e "${GREEN}测试环境已准备就绪!${NC}" \ No newline at end of file diff --git a/scripts/stop_test_environment.sh b/scripts/stop_test_environment.sh deleted file mode 100755 index c17e744..0000000 --- a/scripts/stop_test_environment.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash - -# 停止测试环境脚本 - -set -e - -# 颜色定义 -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# 配置 -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" -PID_FILE="$PROJECT_ROOT/test_environment.pid" -ES_PID_FILE="$PROJECT_ROOT/test_logs/es.pid" - -echo -e "${BLUE}========================================${NC}" -echo -e "${BLUE}停止测试环境${NC}" -echo -e "${BLUE}========================================${NC}" - -# 停止API服务 -if [ -f "$PID_FILE" ]; then - API_PID=$(cat "$PID_FILE") - if ps -p $API_PID > /dev/null 2>&1; then - echo -e "${BLUE}停止API服务 (PID: $API_PID)...${NC}" - kill $API_PID - - # 等待进程结束 - for i in {1..10}; do - if ! ps -p $API_PID > /dev/null 2>&1; then - echo -e "${GREEN}✓ API服务已停止${NC}" - break - fi - if [ $i -eq 10 ]; then - echo -e "${YELLOW}强制停止API服务...${NC}" - kill -9 $API_PID 2>/dev/null || true - fi - sleep 1 - done - else - echo -e "${YELLOW}API服务进程不存在${NC}" - fi - rm -f "$PID_FILE" -else - echo -e "${YELLOW}未找到API服务PID文件${NC}" -fi - -# 停止Elasticsearch(如果是本地启动的) -if [ -f "$ES_PID_FILE" ]; then - ES_PID=$(cat "$ES_PID_FILE") - if ps -p $ES_PID > /dev/null 2>&1; then - echo -e "${BLUE}停止本地Elasticsearch (PID: $ES_PID)...${NC}" - kill $ES_PID - rm -f "$ES_PID_FILE" - echo -e "${GREEN}✓ Elasticsearch已停止${NC}" - else - echo -e "${YELLOW}Elasticsearch进程不存在${NC}" - rm -f "$ES_PID_FILE" - fi -else - echo -e "${BLUE}跳过本地Elasticsearch停止(未找到PID文件)${NC}" -fi - -# 清理测试环境文件 -echo -e "${BLUE}清理测试环境文件...${NC}" -rm -f "$PROJECT_ROOT/test_env.sh" - -# 清理测试索引(可选) -read -p "是否删除测试索引? (y/N): " -n 1 -r -echo -if [[ $REPLY =~ ^[Yy]$ ]]; then - echo -e "${BLUE}删除测试索引...${NC}" - curl -X DELETE "http://localhost:9200/test_products" 2>/dev/null || true - echo -e "${GREEN}✓ 测试索引已删除${NC}" -fi - -echo -e "${GREEN}========================================${NC}" -echo -e "${GREEN}测试环境已停止!${NC}" -echo -e "${GREEN}========================================${NC}" \ No newline at end of file diff --git a/scripts/test_base.py b/scripts/test_base.py deleted file mode 100644 index 3e80dcd..0000000 --- a/scripts/test_base.py +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for base configuration. - -Tests data ingestion, search API, response format, and tenant isolation. -""" - -import sys -import os -import argparse -import requests -import json -from pathlib import Path - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - - -def test_search_api(base_url: str, tenant_id: str, query: str = "耳机"): - """ - Test search API. - - Args: - base_url: API base URL - tenant_id: Tenant ID - query: Search query - - Returns: - Response JSON or None if failed - """ - url = f"{base_url}/search/" - headers = { - "X-Tenant-ID": tenant_id, - "Content-Type": "application/json" - } - payload = { - "query": query, - "size": 10, - "from": 0 - } - - print(f"\nTesting search API:") - print(f" URL: {url}") - print(f" Query: {query}") - print(f" Tenant ID: {tenant_id}") - - try: - response = requests.post(url, json=payload, headers=headers, timeout=30) - response.raise_for_status() - data = response.json() - - print(f" Status: {response.status_code}") - print(f" Total: {data.get('total', 0)}") - print(f" Results: {len(data.get('results', []))}") - - return data - except Exception as e: - print(f" ERROR: {e}") - return None - - -def validate_response_format(data: dict): - """ - Validate response format. - - Args: - data: Response data - - Returns: - List of validation errors (empty if valid) - """ - errors = [] - - # Check for results field (not hits) - if 'hits' in data: - errors.append("Response contains 'hits' field (should be 'results')") - - if 'results' not in data: - errors.append("Response missing 'results' field") - else: - results = data['results'] - if not isinstance(results, list): - errors.append("'results' should be a list") - else: - # Validate first result structure - if results: - result = results[0] - required_fields = ['spu_id', 'title', 'skus', 'relevance_score'] - for field in required_fields: - if field not in result: - errors.append(f"Result missing required field: {field}") - - # Check for ES internal fields - es_internal_fields = ['_id', '_score', '_source'] - for field in es_internal_fields: - if field in result: - errors.append(f"Result contains ES internal field: {field}") - - # Validate skus - if 'skus' in result: - skus = result['skus'] - if not isinstance(skus, list): - errors.append("'skus' should be a list") - elif skus: - sku = skus[0] - sku_required = ['sku_id', 'price', 'sku', 'stock'] - for field in sku_required: - if field not in sku: - errors.append(f"SKU missing required field: {field}") - - # Check for suggestions and related_searches - if 'suggestions' not in data: - errors.append("Response missing 'suggestions' field") - if 'related_searches' not in data: - errors.append("Response missing 'related_searches' field") - - return errors - - -def test_facets(base_url: str, tenant_id: str): - """ - Test facets aggregation. - - Args: - base_url: API base URL - tenant_id: Tenant ID - - Returns: - Response JSON or None if failed - """ - url = f"{base_url}/search/" - headers = { - "X-Tenant-ID": tenant_id, - "Content-Type": "application/json" - } - payload = { - "query": "商品", - "size": 10, - "facets": ["category.keyword", "vendor.keyword"] - } - - print(f"\nTesting facets:") - print(f" Facets: {payload['facets']}") - - try: - response = requests.post(url, json=payload, headers=headers, timeout=30) - response.raise_for_status() - data = response.json() - - if 'facets' in data and data['facets']: - print(f" Facets returned: {len(data['facets'])}") - for facet in data['facets']: - print(f" - {facet.get('field')}: {len(facet.get('values', []))} values") - else: - print(" WARNING: No facets returned") - - return data - except Exception as e: - print(f" ERROR: {e}") - return None - - -def test_tenant_isolation(base_url: str, tenant_id_1: str, tenant_id_2: str): - """ - Test tenant isolation. - - Args: - base_url: API base URL - tenant_id_1: First tenant ID - tenant_id_2: Second tenant ID - """ - print(f"\nTesting tenant isolation:") - print(f" Tenant 1: {tenant_id_1}") - print(f" Tenant 2: {tenant_id_2}") - - # Search for tenant 1 - data1 = test_search_api(base_url, tenant_id_1, "商品") - # Search for tenant 2 - data2 = test_search_api(base_url, tenant_id_2, "商品") - - if data1 and data2: - results1 = set(r.get('spu_id') for r in data1.get('results', [])) - results2 = set(r.get('spu_id') for r in data2.get('results', [])) - - overlap = results1 & results2 - if overlap: - print(f" WARNING: Found {len(overlap)} overlapping results between tenants") - else: - print(f" OK: No overlapping results (tenant isolation working)") - - -def main(): - parser = argparse.ArgumentParser(description='Test base configuration') - parser.add_argument('--api-url', default='http://localhost:8000', help='API base URL') - parser.add_argument('--tenant-id', default='1', help='Tenant ID for testing') - parser.add_argument('--test-tenant-2', help='Second tenant ID for isolation test') - - args = parser.parse_args() - - print("=" * 60) - print("Base Configuration Test Suite") - print("=" * 60) - - # Test 1: Basic search - print("\n[Test 1] Basic Search") - data = test_search_api(args.api_url, args.tenant_id) - if not data: - print("FAILED: Basic search test") - return 1 - - # Test 2: Response format validation - print("\n[Test 2] Response Format Validation") - errors = validate_response_format(data) - if errors: - print("FAILED: Response format validation") - for error in errors: - print(f" - {error}") - return 1 - else: - print("PASSED: Response format is correct") - - # Test 3: Facets - print("\n[Test 3] Facets Aggregation") - facet_data = test_facets(args.api_url, args.tenant_id) - if not facet_data: - print("WARNING: Facets test failed (may be expected if no data)") - - # Test 4: Tenant isolation (if second tenant provided) - if args.test_tenant_2: - print("\n[Test 4] Tenant Isolation") - test_tenant_isolation(args.api_url, args.tenant_id, args.test_tenant_2) - - print("\n" + "=" * 60) - print("All tests completed") - print("=" * 60) - - return 0 - - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/scripts/test_cloud_embedding.py b/scripts/test_cloud_embedding.py deleted file mode 100644 index 67b358c..0000000 --- a/scripts/test_cloud_embedding.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Test script for cloud text embedding using Aliyun DashScope API. - -Reads queries from queries.txt and tests embedding generation, -logging send time, receive time, and duration for each request. -""" - -import os -import sys -import time -from datetime import datetime -from pathlib import Path - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from embeddings.cloud_text_encoder import CloudTextEncoder - - -def format_timestamp(ts: float) -> str: - """Format timestamp to readable string.""" - return datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] - - -def read_queries(file_path: str, limit: int = 100) -> list: - """ - Read queries from text file. - - Args: - file_path: Path to queries file - limit: Maximum number of queries to read - - Returns: - List of query strings - """ - queries = [] - with open(file_path, 'r', encoding='utf-8') as f: - for i, line in enumerate(f): - if i >= limit: - break - query = line.strip() - if query: # Skip empty lines - queries.append(query) - return queries - - -def test_cloud_embedding(queries_file: str, num_queries: int = 100): - """ - Test cloud embedding with queries from file. - - Args: - queries_file: Path to queries file - num_queries: Number of queries to test - """ - print("=" * 80) - print("Cloud Text Embedding Test - Aliyun DashScope API") - print("=" * 80) - print() - - # Check if API key is set - api_key = os.getenv("DASHSCOPE_API_KEY") - if not api_key: - print("ERROR: DASHSCOPE_API_KEY environment variable is not set!") - print("Please set it using: export DASHSCOPE_API_KEY='your-api-key'") - return - - print(f"API Key: {api_key[:10]}...{api_key[-4:]}") - print() - - # Read queries - print(f"Reading queries from: {queries_file}") - try: - queries = read_queries(queries_file, limit=num_queries) - print(f"Successfully read {len(queries)} queries") - print() - except Exception as e: - print(f"ERROR: Failed to read queries file: {e}") - return - - # Initialize encoder - print("Initializing CloudTextEncoder...") - try: - encoder = CloudTextEncoder() - print("CloudTextEncoder initialized successfully") - print() - except Exception as e: - print(f"ERROR: Failed to initialize encoder: {e}") - return - - # Test embeddings - print("=" * 80) - print(f"Testing {len(queries)} queries (one by one)") - print("=" * 80) - print() - - total_start = time.time() - success_count = 0 - failure_count = 0 - total_duration = 0.0 - - for i, query in enumerate(queries, 1): - try: - # Record send time - send_time = time.time() - send_time_str = format_timestamp(send_time) - - # Generate embedding - embedding = encoder.encode(query) - - # Record receive time - receive_time = time.time() - receive_time_str = format_timestamp(receive_time) - - # Calculate duration - duration = receive_time - send_time - total_duration += duration - - # Verify embedding - if embedding.shape[0] > 0: - success_count += 1 - status = "✓ SUCCESS" - else: - failure_count += 1 - status = "✗ FAILED" - - # Print result - query_display = query[:50] + "..." if len(query) > 50 else query - print(f"[{i:3d}/{len(queries)}] {status}") - print(f" Query: {query_display}") - print(f" Send Time: {send_time_str}") - print(f" Receive Time: {receive_time_str}") - print(f" Duration: {duration:.3f}s") - print(f" Embedding Shape: {embedding.shape}") - print() - - except Exception as e: - failure_count += 1 - receive_time = time.time() - duration = receive_time - send_time - - print(f"[{i:3d}/{len(queries)}] ✗ ERROR") - print(f" Query: {query[:50]}...") - print(f" Send Time: {send_time_str}") - print(f" Receive Time: {format_timestamp(receive_time)}") - print(f" Duration: {duration:.3f}s") - print(f" Error: {str(e)}") - print() - - # Print summary - total_elapsed = time.time() - total_start - avg_duration = total_duration / len(queries) if queries else 0 - - print("=" * 80) - print("Test Summary") - print("=" * 80) - print(f"Total Queries: {len(queries)}") - print(f"Successful: {success_count}") - print(f"Failed: {failure_count}") - print(f"Success Rate: {success_count / len(queries) * 100:.1f}%") - print(f"Total Time: {total_elapsed:.3f}s") - print(f"Total API Time: {total_duration:.3f}s") - print(f"Average Duration: {avg_duration:.3f}s per query") - print(f"Throughput: {len(queries) / total_elapsed:.2f} queries/second") - print("=" * 80) - - -def main(): - """Main entry point.""" - # Default queries file path - queries_file = Path(__file__).parent.parent / "data_crawling" / "queries.txt" - - # Check if file exists - if not queries_file.exists(): - print(f"ERROR: Queries file not found: {queries_file}") - return - - # Run test with 100 queries - test_cloud_embedding(str(queries_file), num_queries=100) - - -if __name__ == "__main__": - main() - diff --git a/scripts/test_cnclip_service.py b/scripts/test_cnclip_service.py deleted file mode 100755 index 2fcfc7b..0000000 --- a/scripts/test_cnclip_service.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python3 -""" -CN-CLIP 服务测试脚本 - -用途: - 测试 CN-CLIP 服务的文本和图像编码功能(使用 gRPC 协议) - -使用方法: - python scripts/test_cnclip_service.py [PORT] - -参数: - PORT: 服务端口(默认:51000) -""" - -import sys -import numpy as np -from clip_client import Client - - -def test_encoding(client, test_name, inputs): - """测试编码功能""" - print(f"\n{test_name}...") - try: - result = client.encode(inputs) - if isinstance(result, np.ndarray): - print(f"✓ 成功! 形状: {result.shape}") - print(f" 输入数量: {len(inputs)}") - print(f" 输出维度: {result.shape[1]}") - - # 显示每个 embedding 的维度和前20个数字 - for i in range(min(len(inputs), result.shape[0])): - emb = result[i] - first_20 = emb[:20].tolist() - - # 计算 L2 归一化 - norm = np.linalg.norm(emb) - normalized_emb = emb / norm if norm > 0 else emb - normalized_first_20 = normalized_emb[:20].tolist() - - print(f" input: {inputs[i]}") - print(f" Embedding[{i}] 维度: {len(emb)}") - print(f" 前20个数字: {first_20}") - print(f" normalize后的前20个数字: {normalized_first_20}") - return True - else: - print(f"✗ 失败: 返回类型错误: {type(result)}") - return False - except Exception as e: - print(f"✗ 失败: {e}") - import traceback - traceback.print_exc() - return False - - -def main(): - # 获取端口参数 - port = sys.argv[1] if len(sys.argv) > 1 else "51000" - grpc_url = f"grpc://localhost:{port}" - - print("=" * 50) - print("CN-CLIP 服务测试") - print("=" * 50) - print(f"服务地址: {grpc_url} (gRPC 协议)") - print() - - # 创建客户端 - try: - client = Client(grpc_url) - except Exception as e: - print(f"✗ 客户端创建失败: {e}") - sys.exit(1) - - # 运行测试 - results = [] - - # 测试1: 文本编码 - results.append(test_encoding( - client, - "测试1: 编码文本", - ['这是一个测试文本', '另一个测试文本'] - )) - - # 测试2: 图像编码 - results.append(test_encoding( - client, - "测试2: 编码图像(远程 URL)", - ['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] - )) - - # 测试3: 混合编码 - results.append(test_encoding( - client, - "测试3: 混合编码(文本和图像)", - ['这是一段文本', 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] - )) - - # 汇总 - print("\n" + "=" * 50) - print("测试结果汇总") - print("=" * 50) - print(f"总测试数: {len(results)}") - print(f"通过: {sum(results)}") - print(f"失败: {len(results) - sum(results)}") - - if all(results): - print("\n✓ 所有测试通过!") - sys.exit(0) - else: - print("\n✗ 部分测试失败") - sys.exit(1) - - -if __name__ == '__main__': - main() - diff --git a/scripts/test_facet_api.py b/scripts/test_facet_api.py deleted file mode 100755 index e6c7f55..0000000 --- a/scripts/test_facet_api.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python3 -""" -测试脚本:模拟前端请求,检查后端返回的分面结果 -""" - -import sys -import json -import requests -import argparse -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent)) - - -def main(): - parser = argparse.ArgumentParser(description='测试分面API') - parser.add_argument('--api-url', type=str, default='http://localhost:6002/search/', help='API URL') - parser.add_argument('--tenant-id', type=str, required=True, help='Tenant ID') - args = parser.parse_args() - - # 模拟前端的分面请求(与frontend/static/js/app.js一致) - request_data = { - "query": "", # 空查询,获取所有数据 - "size": 10, - "from": 0, - "facets": [ - "category1_name", - "specifications.color", - "specifications.size", - "specifications.material" - ] - } - - headers = { - "Content-Type": "application/json", - "X-Tenant-ID": args.tenant_id - } - - try: - print(f"发送请求到: {args.api_url}") - print(f"Tenant ID: {args.tenant_id}") - print(f"请求数据:") - print(json.dumps(request_data, indent=2, ensure_ascii=False)) - print("\n" + "="*60) - - response = requests.post(args.api_url, json=request_data, headers=headers, timeout=30) - - if response.status_code != 200: - print(f"API错误: {response.status_code}") - print(response.text) - return 1 - - data = response.json() - - print("API响应:") - print(f" 总结果数: {data.get('total', 0)}") - print(f" 返回结果数: {len(data.get('results', []))}") - - facets = data.get('facets', []) - print(f"\n分面数量: {len(facets)}") - - if not facets: - print("\n⚠ 分面列表为空!") - return 1 - - print("\n" + "="*60) - print("分面详情:") - print("="*60) - - for i, facet in enumerate(facets, 1): - print(f"\n{i}. {facet.get('field')}") - print(f" 标签: {facet.get('label')}") - print(f" 类型: {facet.get('type')}") - print(f" 值数量: {len(facet.get('values', []))}") - print(f" 总计数: {facet.get('total_count', 0)}") - - values = facet.get('values', []) - if values: - print(f" 前5个值:") - for v in values[:5]: - print(f" - {v.get('value')}: {v.get('count')}") - else: - print(f" ⚠ 值列表为空!") - - # 检查specifications.color分面 - print("\n" + "="*60) - print("检查specifications.color分面:") - print("="*60) - - color_facet = None - for facet in facets: - if facet.get('field') == 'specifications.color': - color_facet = facet - break - - if color_facet: - print("✓ 找到specifications.color分面") - print(f" 值数量: {len(color_facet.get('values', []))}") - if color_facet.get('values'): - print(" 前10个值:") - for v in color_facet.get('values', [])[:10]: - print(f" {v.get('value')}: {v.get('count')}") - else: - print(" ⚠ 值列表为空!") - else: - print("✗ 未找到specifications.color分面") - print(f" 可用分面字段: {[f.get('field') for f in facets]}") - - # 输出完整JSON(便于调试) - print("\n" + "="*60) - print("完整分面JSON(前500字符):") - print("="*60) - facets_json = json.dumps(facets, indent=2, ensure_ascii=False) - print(facets_json[:500]) - - except requests.exceptions.ConnectionError as e: - print(f"\n连接错误: 无法连接到API服务器 {args.api_url}") - print("请确保后端服务正在运行") - return 1 - except Exception as e: - print(f"\n错误: {e}") - import traceback - traceback.print_exc() - return 1 - - return 0 - - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/scripts/test_frontend.sh b/scripts/test_frontend.sh deleted file mode 100755 index f5024f3..0000000 --- a/scripts/test_frontend.sh +++ /dev/null @@ -1,94 +0,0 @@ -#!/bin/bash - -# Test Frontend - Quick verification script - -set -e - -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -RED='\033[0;31m' -NC='\033[0m' - -API_URL="http://120.76.41.98:6002" - -echo -e "${GREEN}========================================${NC}" -echo -e "${GREEN}Frontend Test Script${NC}" -echo -e "${GREEN}========================================${NC}" - -echo -e "\n${YELLOW}Testing API endpoints...${NC}" - -# Test 1: Health check -echo -e "\n1. Testing health endpoint..." -if curl -s "${API_URL}/health" > /dev/null; then - echo -e "${GREEN}✓ Health check passed${NC}" -else - echo -e "${RED}✗ Health check failed${NC}" - exit 1 -fi - -# Test 2: Frontend HTML -echo -e "\n2. Testing frontend HTML..." -if curl -s "${API_URL}/" | grep -q "Product Search"; then - echo -e "${GREEN}✓ Frontend HTML accessible${NC}" -else - echo -e "${RED}✗ Frontend HTML not found${NC}" - exit 1 -fi - -# Test 3: Static CSS -echo -e "\n3. Testing static CSS..." -if curl -s "${API_URL}/static/css/style.css" | grep -q "page-container"; then - echo -e "${GREEN}✓ CSS file accessible${NC}" -else - echo -e "${RED}✗ CSS file not found${NC}" - exit 1 -fi - -# Test 4: Static JS -echo -e "\n4. Testing static JavaScript..." -if curl -s "${API_URL}/static/js/app.js" | grep -q "performSearch"; then - echo -e "${GREEN}✓ JavaScript file accessible${NC}" -else - echo -e "${RED}✗ JavaScript file not found${NC}" - exit 1 -fi - -# Test 5: Search API -echo -e "\n5. Testing search API..." -SEARCH_RESULT=$(curl -s -X POST "${API_URL}/search/" \ - -H "Content-Type: application/json" \ - -d '{"query":"玩具","size":5}') - -if echo "$SEARCH_RESULT" | grep -q "hits"; then - echo -e "${GREEN}✓ Search API working${NC}" - TOTAL=$(echo "$SEARCH_RESULT" | grep -o '"total":[0-9]*' | cut -d: -f2) - echo -e " Found ${YELLOW}${TOTAL}${NC} results" -else - echo -e "${RED}✗ Search API failed${NC}" - exit 1 -fi - -echo -e "\n${GREEN}========================================${NC}" -echo -e "${GREEN}All tests passed! ✓${NC}" -echo -e "${GREEN}========================================${NC}" - -echo -e "\n${YELLOW}Frontend is ready!${NC}" -echo -e "Open in browser: ${GREEN}${API_URL}/${NC}" - -echo -e "\n${YELLOW}Quick Start Guide:${NC}" -echo "1. Open browser and go to: ${API_URL}/" -echo "2. Enter a search query (e.g., '玩具')" -echo "3. Click on filter tags to refine results" -echo "4. Use sort buttons with arrows to sort" -echo "5. Use pagination at the bottom to browse" - -echo -e "\n${YELLOW}Key Features:${NC}" -echo "- Clean white background design" -echo "- Horizontal filter tags (categories, brands, suppliers)" -echo "- Sort buttons with up/down arrows for ascending/descending" -echo "- Product grid with images, prices, MOQ info" -echo "- Full pagination support" -echo "- Responsive design for mobile and desktop" - -echo -e "\n${GREEN}Enjoy your new frontend! 🎉${NC}" - diff --git a/tests/test_cloud_embedding.py b/tests/test_cloud_embedding.py new file mode 100644 index 0000000..67b358c --- /dev/null +++ b/tests/test_cloud_embedding.py @@ -0,0 +1,183 @@ +""" +Test script for cloud text embedding using Aliyun DashScope API. + +Reads queries from queries.txt and tests embedding generation, +logging send time, receive time, and duration for each request. +""" + +import os +import sys +import time +from datetime import datetime +from pathlib import Path + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from embeddings.cloud_text_encoder import CloudTextEncoder + + +def format_timestamp(ts: float) -> str: + """Format timestamp to readable string.""" + return datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] + + +def read_queries(file_path: str, limit: int = 100) -> list: + """ + Read queries from text file. + + Args: + file_path: Path to queries file + limit: Maximum number of queries to read + + Returns: + List of query strings + """ + queries = [] + with open(file_path, 'r', encoding='utf-8') as f: + for i, line in enumerate(f): + if i >= limit: + break + query = line.strip() + if query: # Skip empty lines + queries.append(query) + return queries + + +def test_cloud_embedding(queries_file: str, num_queries: int = 100): + """ + Test cloud embedding with queries from file. + + Args: + queries_file: Path to queries file + num_queries: Number of queries to test + """ + print("=" * 80) + print("Cloud Text Embedding Test - Aliyun DashScope API") + print("=" * 80) + print() + + # Check if API key is set + api_key = os.getenv("DASHSCOPE_API_KEY") + if not api_key: + print("ERROR: DASHSCOPE_API_KEY environment variable is not set!") + print("Please set it using: export DASHSCOPE_API_KEY='your-api-key'") + return + + print(f"API Key: {api_key[:10]}...{api_key[-4:]}") + print() + + # Read queries + print(f"Reading queries from: {queries_file}") + try: + queries = read_queries(queries_file, limit=num_queries) + print(f"Successfully read {len(queries)} queries") + print() + except Exception as e: + print(f"ERROR: Failed to read queries file: {e}") + return + + # Initialize encoder + print("Initializing CloudTextEncoder...") + try: + encoder = CloudTextEncoder() + print("CloudTextEncoder initialized successfully") + print() + except Exception as e: + print(f"ERROR: Failed to initialize encoder: {e}") + return + + # Test embeddings + print("=" * 80) + print(f"Testing {len(queries)} queries (one by one)") + print("=" * 80) + print() + + total_start = time.time() + success_count = 0 + failure_count = 0 + total_duration = 0.0 + + for i, query in enumerate(queries, 1): + try: + # Record send time + send_time = time.time() + send_time_str = format_timestamp(send_time) + + # Generate embedding + embedding = encoder.encode(query) + + # Record receive time + receive_time = time.time() + receive_time_str = format_timestamp(receive_time) + + # Calculate duration + duration = receive_time - send_time + total_duration += duration + + # Verify embedding + if embedding.shape[0] > 0: + success_count += 1 + status = "✓ SUCCESS" + else: + failure_count += 1 + status = "✗ FAILED" + + # Print result + query_display = query[:50] + "..." if len(query) > 50 else query + print(f"[{i:3d}/{len(queries)}] {status}") + print(f" Query: {query_display}") + print(f" Send Time: {send_time_str}") + print(f" Receive Time: {receive_time_str}") + print(f" Duration: {duration:.3f}s") + print(f" Embedding Shape: {embedding.shape}") + print() + + except Exception as e: + failure_count += 1 + receive_time = time.time() + duration = receive_time - send_time + + print(f"[{i:3d}/{len(queries)}] ✗ ERROR") + print(f" Query: {query[:50]}...") + print(f" Send Time: {send_time_str}") + print(f" Receive Time: {format_timestamp(receive_time)}") + print(f" Duration: {duration:.3f}s") + print(f" Error: {str(e)}") + print() + + # Print summary + total_elapsed = time.time() - total_start + avg_duration = total_duration / len(queries) if queries else 0 + + print("=" * 80) + print("Test Summary") + print("=" * 80) + print(f"Total Queries: {len(queries)}") + print(f"Successful: {success_count}") + print(f"Failed: {failure_count}") + print(f"Success Rate: {success_count / len(queries) * 100:.1f}%") + print(f"Total Time: {total_elapsed:.3f}s") + print(f"Total API Time: {total_duration:.3f}s") + print(f"Average Duration: {avg_duration:.3f}s per query") + print(f"Throughput: {len(queries) / total_elapsed:.2f} queries/second") + print("=" * 80) + + +def main(): + """Main entry point.""" + # Default queries file path + queries_file = Path(__file__).parent.parent / "data_crawling" / "queries.txt" + + # Check if file exists + if not queries_file.exists(): + print(f"ERROR: Queries file not found: {queries_file}") + return + + # Run test with 100 queries + test_cloud_embedding(str(queries_file), num_queries=100) + + +if __name__ == "__main__": + main() + diff --git a/tests/test_cnclip_service.py b/tests/test_cnclip_service.py new file mode 100755 index 0000000..2fcfc7b --- /dev/null +++ b/tests/test_cnclip_service.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +""" +CN-CLIP 服务测试脚本 + +用途: + 测试 CN-CLIP 服务的文本和图像编码功能(使用 gRPC 协议) + +使用方法: + python scripts/test_cnclip_service.py [PORT] + +参数: + PORT: 服务端口(默认:51000) +""" + +import sys +import numpy as np +from clip_client import Client + + +def test_encoding(client, test_name, inputs): + """测试编码功能""" + print(f"\n{test_name}...") + try: + result = client.encode(inputs) + if isinstance(result, np.ndarray): + print(f"✓ 成功! 形状: {result.shape}") + print(f" 输入数量: {len(inputs)}") + print(f" 输出维度: {result.shape[1]}") + + # 显示每个 embedding 的维度和前20个数字 + for i in range(min(len(inputs), result.shape[0])): + emb = result[i] + first_20 = emb[:20].tolist() + + # 计算 L2 归一化 + norm = np.linalg.norm(emb) + normalized_emb = emb / norm if norm > 0 else emb + normalized_first_20 = normalized_emb[:20].tolist() + + print(f" input: {inputs[i]}") + print(f" Embedding[{i}] 维度: {len(emb)}") + print(f" 前20个数字: {first_20}") + print(f" normalize后的前20个数字: {normalized_first_20}") + return True + else: + print(f"✗ 失败: 返回类型错误: {type(result)}") + return False + except Exception as e: + print(f"✗ 失败: {e}") + import traceback + traceback.print_exc() + return False + + +def main(): + # 获取端口参数 + port = sys.argv[1] if len(sys.argv) > 1 else "51000" + grpc_url = f"grpc://localhost:{port}" + + print("=" * 50) + print("CN-CLIP 服务测试") + print("=" * 50) + print(f"服务地址: {grpc_url} (gRPC 协议)") + print() + + # 创建客户端 + try: + client = Client(grpc_url) + except Exception as e: + print(f"✗ 客户端创建失败: {e}") + sys.exit(1) + + # 运行测试 + results = [] + + # 测试1: 文本编码 + results.append(test_encoding( + client, + "测试1: 编码文本", + ['这是一个测试文本', '另一个测试文本'] + )) + + # 测试2: 图像编码 + results.append(test_encoding( + client, + "测试2: 编码图像(远程 URL)", + ['https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] + )) + + # 测试3: 混合编码 + results.append(test_encoding( + client, + "测试3: 混合编码(文本和图像)", + ['这是一段文本', 'https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg'] + )) + + # 汇总 + print("\n" + "=" * 50) + print("测试结果汇总") + print("=" * 50) + print(f"总测试数: {len(results)}") + print(f"通过: {sum(results)}") + print(f"失败: {len(results) - sum(results)}") + + if all(results): + print("\n✓ 所有测试通过!") + sys.exit(0) + else: + print("\n✗ 部分测试失败") + sys.exit(1) + + +if __name__ == '__main__': + main() + diff --git a/tests/test_facet_api.py b/tests/test_facet_api.py new file mode 100755 index 0000000..e6c7f55 --- /dev/null +++ b/tests/test_facet_api.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +测试脚本:模拟前端请求,检查后端返回的分面结果 +""" + +import sys +import json +import requests +import argparse +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + + +def main(): + parser = argparse.ArgumentParser(description='测试分面API') + parser.add_argument('--api-url', type=str, default='http://localhost:6002/search/', help='API URL') + parser.add_argument('--tenant-id', type=str, required=True, help='Tenant ID') + args = parser.parse_args() + + # 模拟前端的分面请求(与frontend/static/js/app.js一致) + request_data = { + "query": "", # 空查询,获取所有数据 + "size": 10, + "from": 0, + "facets": [ + "category1_name", + "specifications.color", + "specifications.size", + "specifications.material" + ] + } + + headers = { + "Content-Type": "application/json", + "X-Tenant-ID": args.tenant_id + } + + try: + print(f"发送请求到: {args.api_url}") + print(f"Tenant ID: {args.tenant_id}") + print(f"请求数据:") + print(json.dumps(request_data, indent=2, ensure_ascii=False)) + print("\n" + "="*60) + + response = requests.post(args.api_url, json=request_data, headers=headers, timeout=30) + + if response.status_code != 200: + print(f"API错误: {response.status_code}") + print(response.text) + return 1 + + data = response.json() + + print("API响应:") + print(f" 总结果数: {data.get('total', 0)}") + print(f" 返回结果数: {len(data.get('results', []))}") + + facets = data.get('facets', []) + print(f"\n分面数量: {len(facets)}") + + if not facets: + print("\n⚠ 分面列表为空!") + return 1 + + print("\n" + "="*60) + print("分面详情:") + print("="*60) + + for i, facet in enumerate(facets, 1): + print(f"\n{i}. {facet.get('field')}") + print(f" 标签: {facet.get('label')}") + print(f" 类型: {facet.get('type')}") + print(f" 值数量: {len(facet.get('values', []))}") + print(f" 总计数: {facet.get('total_count', 0)}") + + values = facet.get('values', []) + if values: + print(f" 前5个值:") + for v in values[:5]: + print(f" - {v.get('value')}: {v.get('count')}") + else: + print(f" ⚠ 值列表为空!") + + # 检查specifications.color分面 + print("\n" + "="*60) + print("检查specifications.color分面:") + print("="*60) + + color_facet = None + for facet in facets: + if facet.get('field') == 'specifications.color': + color_facet = facet + break + + if color_facet: + print("✓ 找到specifications.color分面") + print(f" 值数量: {len(color_facet.get('values', []))}") + if color_facet.get('values'): + print(" 前10个值:") + for v in color_facet.get('values', [])[:10]: + print(f" {v.get('value')}: {v.get('count')}") + else: + print(" ⚠ 值列表为空!") + else: + print("✗ 未找到specifications.color分面") + print(f" 可用分面字段: {[f.get('field') for f in facets]}") + + # 输出完整JSON(便于调试) + print("\n" + "="*60) + print("完整分面JSON(前500字符):") + print("="*60) + facets_json = json.dumps(facets, indent=2, ensure_ascii=False) + print(facets_json[:500]) + + except requests.exceptions.ConnectionError as e: + print(f"\n连接错误: 无法连接到API服务器 {args.api_url}") + print("请确保后端服务正在运行") + return 1 + except Exception as e: + print(f"\n错误: {e}") + import traceback + traceback.print_exc() + return 1 + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) + -- libgit2 0.21.2