Commit 7746376c302e7ef1da9aba268a67baaa1d2e5175
1 parent
506c39b7
日志统一用中文
Showing
6 changed files
with
112 additions
and
106 deletions
Show diff stats
data/wanbang/amazon_crawler.py
| ... | ... | @@ -85,7 +85,7 @@ class AmazonCrawler: |
| 85 | 85 | params[param] = kwargs[param] |
| 86 | 86 | |
| 87 | 87 | try: |
| 88 | - logger.info(f"正在请求: {query}") | |
| 88 | + logger.info(f"Making request: {query}") | |
| 89 | 89 | self.total_requests += 1 |
| 90 | 90 | |
| 91 | 91 | response = requests.get( |
| ... | ... | @@ -98,20 +98,20 @@ class AmazonCrawler: |
| 98 | 98 | data = response.json() |
| 99 | 99 | |
| 100 | 100 | if data.get('error_code') == '0000': |
| 101 | - logger.info(f"✓ 成功: {query} - 获得 {data.get('items', {}).get('real_total_results', 0)} 个结果") | |
| 101 | + logger.info(f"✓ Success: {query} - Got {data.get('items', {}).get('real_total_results', 0)} results") | |
| 102 | 102 | self.successful_requests += 1 |
| 103 | 103 | return data |
| 104 | 104 | else: |
| 105 | - logger.error(f"✗ API错误: {query} - {data.get('reason', 'Unknown error')}") | |
| 105 | + logger.error(f"✗ API error: {query} - {data.get('reason', 'Unknown error')}") | |
| 106 | 106 | self.failed_requests += 1 |
| 107 | 107 | return data |
| 108 | 108 | |
| 109 | 109 | except requests.exceptions.RequestException as e: |
| 110 | - logger.error(f"✗ 请求失败: {query} - {str(e)}") | |
| 110 | + logger.error(f"✗ Request failed: {query} - {str(e)}") | |
| 111 | 111 | self.failed_requests += 1 |
| 112 | 112 | return None |
| 113 | 113 | except json.JSONDecodeError as e: |
| 114 | - logger.error(f"✗ JSON解析失败: {query} - {str(e)}") | |
| 114 | + logger.error(f"✗ JSON parse failed: {query} - {str(e)}") | |
| 115 | 115 | self.failed_requests += 1 |
| 116 | 116 | return None |
| 117 | 117 | |
| ... | ... | @@ -127,36 +127,36 @@ class AmazonCrawler: |
| 127 | 127 | try: |
| 128 | 128 | with open(filepath, 'w', encoding='utf-8') as f: |
| 129 | 129 | json.dump(data, f, ensure_ascii=False, indent=2) |
| 130 | - logger.debug(f"已保存: {filename}") | |
| 130 | + logger.debug(f"Saved: {filename}") | |
| 131 | 131 | except Exception as e: |
| 132 | - logger.error(f"保存失败: {filename} - {str(e)}") | |
| 132 | + logger.error(f"Save failed: {filename} - {str(e)}") | |
| 133 | 133 | |
| 134 | 134 | def crawl_from_file(self, queries_file: str, delay: float = 1.0, |
| 135 | 135 | start_index: int = 0, max_queries: Optional[int] = None): |
| 136 | 136 | """从文件读取查询列表并批量爬取""" |
| 137 | 137 | self.start_time = datetime.now() |
| 138 | 138 | logger.info("=" * 70) |
| 139 | - logger.info(f"Amazon爬虫启动 - {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}") | |
| 139 | + logger.info(f"Amazon crawler started - {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}") | |
| 140 | 140 | logger.info("=" * 70) |
| 141 | - logger.info(f"查询文件: {queries_file}") | |
| 142 | - logger.info(f"结果目录: {self.results_dir}") | |
| 141 | + logger.info(f"Queries file: {queries_file}") | |
| 142 | + logger.info(f"Results directory: {self.results_dir}") | |
| 143 | 143 | |
| 144 | 144 | try: |
| 145 | 145 | with open(queries_file, 'r', encoding='utf-8') as f: |
| 146 | 146 | queries = [line.strip() for line in f if line.strip()] |
| 147 | 147 | |
| 148 | 148 | total_queries = len(queries) |
| 149 | - logger.info(f"共读取 {total_queries} 个查询") | |
| 149 | + logger.info(f"Total queries read: {total_queries}") | |
| 150 | 150 | |
| 151 | 151 | if start_index > 0: |
| 152 | 152 | queries = queries[start_index:] |
| 153 | - logger.info(f"从索引 {start_index} 开始") | |
| 153 | + logger.info(f"Starting from index {start_index}") | |
| 154 | 154 | |
| 155 | 155 | if max_queries: |
| 156 | 156 | queries = queries[:max_queries] |
| 157 | - logger.info(f"限制爬取数量: {max_queries}") | |
| 157 | + logger.info(f"Limit crawl count to: {max_queries}") | |
| 158 | 158 | |
| 159 | - logger.info(f"请求间隔: {delay} 秒") | |
| 159 | + logger.info(f"Request interval: {delay} seconds") | |
| 160 | 160 | logger.info("=" * 70) |
| 161 | 161 | |
| 162 | 162 | # 逐个爬取 |
| ... | ... | @@ -189,24 +189,24 @@ class AmazonCrawler: |
| 189 | 189 | duration = end_time - self.start_time |
| 190 | 190 | |
| 191 | 191 | logger.info("=" * 70) |
| 192 | - logger.info("爬取完成!") | |
| 192 | + logger.info("Crawling completed!") | |
| 193 | 193 | logger.info("=" * 70) |
| 194 | - logger.info(f"开始时间: {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}") | |
| 195 | - logger.info(f"结束时间: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") | |
| 196 | - logger.info(f"总耗时: {duration}") | |
| 197 | - logger.info(f"总请求数: {self.total_requests}") | |
| 198 | - logger.info(f"成功: {self.successful_requests} ({self.successful_requests/self.total_requests*100:.1f}%)") | |
| 199 | - logger.info(f"失败: {self.failed_requests} ({self.failed_requests/self.total_requests*100:.1f}%)") | |
| 200 | - logger.info(f"结果保存在: {self.results_dir.absolute()}") | |
| 194 | + logger.info(f"Start time: {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}") | |
| 195 | + logger.info(f"End time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") | |
| 196 | + logger.info(f"Total duration: {duration}") | |
| 197 | + logger.info(f"Total requests: {self.total_requests}") | |
| 198 | + logger.info(f"Successful: {self.successful_requests} ({self.successful_requests/self.total_requests*100:.1f}%)") | |
| 199 | + logger.info(f"Failed: {self.failed_requests} ({self.failed_requests/self.total_requests*100:.1f}%)") | |
| 200 | + logger.info(f"Results saved to: {self.results_dir.absolute()}") | |
| 201 | 201 | logger.info("=" * 70) |
| 202 | 202 | |
| 203 | 203 | except FileNotFoundError: |
| 204 | - logger.error(f"文件不存在: {queries_file}") | |
| 204 | + logger.error(f"File not found: {queries_file}") | |
| 205 | 205 | except KeyboardInterrupt: |
| 206 | - logger.warning("\n用户中断爬取") | |
| 207 | - logger.info(f"已完成: {self.successful_requests}/{self.total_requests}") | |
| 206 | + logger.warning("\nUser interrupted crawling") | |
| 207 | + logger.info(f"Completed: {self.successful_requests}/{self.total_requests}") | |
| 208 | 208 | except Exception as e: |
| 209 | - logger.error(f"爬取过程出错: {str(e)}", exc_info=True) | |
| 209 | + logger.error(f"Error during crawling: {str(e)}", exc_info=True) | |
| 210 | 210 | |
| 211 | 211 | |
| 212 | 212 | def load_config(): |
| ... | ... | @@ -216,7 +216,7 @@ def load_config(): |
| 216 | 216 | import config |
| 217 | 217 | return config |
| 218 | 218 | except ImportError: |
| 219 | - logger.warning("未找到配置文件 config.py,使用默认配置") | |
| 219 | + logger.warning("Config file config.py not found, using default configuration") | |
| 220 | 220 | return None |
| 221 | 221 | |
| 222 | 222 | |
| ... | ... | @@ -258,12 +258,12 @@ def main(): |
| 258 | 258 | if not api_key or not api_secret or \ |
| 259 | 259 | api_key == "your_api_key_here" or api_secret == "your_api_secret_here": |
| 260 | 260 | logger.error("=" * 70) |
| 261 | - logger.error("错误: 未配置API密钥!") | |
| 261 | + logger.error("Error: API key not configured!") | |
| 262 | 262 | logger.error("") |
| 263 | - logger.error("请使用以下任一方式配置API密钥:") | |
| 264 | - logger.error("1. 命令行参数: --key YOUR_KEY --secret YOUR_SECRET") | |
| 265 | - logger.error("2. 配置文件: 复制 config.example.py 为 config.py 并填入密钥") | |
| 266 | - logger.error("3. 环境变量: ONEBOUND_API_KEY 和 ONEBOUND_API_SECRET") | |
| 263 | + logger.error("Please configure API key using one of the following methods:") | |
| 264 | + logger.error("1. Command line arguments: --key YOUR_KEY --secret YOUR_SECRET") | |
| 265 | + logger.error("2. Config file: Copy config.example.py to config.py and fill in the keys") | |
| 266 | + logger.error("3. Environment variables: ONEBOUND_API_KEY and ONEBOUND_API_SECRET") | |
| 267 | 267 | logger.error("=" * 70) |
| 268 | 268 | return |
| 269 | 269 | ... | ... |
data/wanbang/analyze_results.py
| ... | ... | @@ -30,22 +30,22 @@ class ResultAnalyzer: |
| 30 | 30 | """ |
| 31 | 31 | self.results_dir = Path(results_dir) |
| 32 | 32 | if not self.results_dir.exists(): |
| 33 | - logger.error(f"结果目录不存在: {self.results_dir}") | |
| 33 | + logger.error(f"Results directory does not exist: {self.results_dir}") | |
| 34 | 34 | raise FileNotFoundError(f"Directory not found: {self.results_dir}") |
| 35 | 35 | |
| 36 | 36 | def analyze(self): |
| 37 | 37 | """执行完整分析""" |
| 38 | 38 | logger.info("=" * 70) |
| 39 | - logger.info("Amazon爬取结果分析") | |
| 39 | + logger.info("Amazon crawling result analysis") | |
| 40 | 40 | logger.info("=" * 70) |
| 41 | - logger.info(f"结果目录: {self.results_dir.absolute()}") | |
| 41 | + logger.info(f"Results directory: {self.results_dir.absolute()}") | |
| 42 | 42 | |
| 43 | 43 | # 获取所有JSON文件 |
| 44 | 44 | json_files = list(self.results_dir.glob("*.json")) |
| 45 | - logger.info(f"JSON文件数量: {len(json_files)}") | |
| 45 | + logger.info(f"Number of JSON files: {len(json_files)}") | |
| 46 | 46 | |
| 47 | 47 | if not json_files: |
| 48 | - logger.warning("未找到任何JSON文件") | |
| 48 | + logger.warning("No JSON files found") | |
| 49 | 49 | return |
| 50 | 50 | |
| 51 | 51 | # 统计数据 |
| ... | ... | @@ -62,7 +62,7 @@ class ResultAnalyzer: |
| 62 | 62 | } |
| 63 | 63 | |
| 64 | 64 | # 分析每个文件 |
| 65 | - logger.info("\n正在分析文件...") | |
| 65 | + logger.info("\nAnalyzing files...") | |
| 66 | 66 | for json_file in json_files: |
| 67 | 67 | try: |
| 68 | 68 | with open(json_file, 'r', encoding='utf-8') as f: |
| ... | ... | @@ -118,7 +118,7 @@ class ResultAnalyzer: |
| 118 | 118 | stats['failed'] += 1 |
| 119 | 119 | |
| 120 | 120 | except Exception as e: |
| 121 | - logger.error(f"分析文件失败 {json_file.name}: {str(e)}") | |
| 121 | + logger.error(f"Failed to analyze file {json_file.name}: {str(e)}") | |
| 122 | 122 | stats['failed'] += 1 |
| 123 | 123 | |
| 124 | 124 | # 输出统计结果 |
| ... | ... | @@ -130,29 +130,29 @@ class ResultAnalyzer: |
| 130 | 130 | def print_stats(self, stats: Dict): |
| 131 | 131 | """打印统计信息""" |
| 132 | 132 | logger.info("\n" + "=" * 70) |
| 133 | - logger.info("统计结果") | |
| 133 | + logger.info("Statistics results") | |
| 134 | 134 | logger.info("=" * 70) |
| 135 | 135 | |
| 136 | 136 | # 基本统计 |
| 137 | - logger.info(f"\n【文件统计】") | |
| 138 | - logger.info(f"总文件数: {stats['total_files']}") | |
| 139 | - logger.info(f"成功: {stats['successful']} ({stats['successful']/stats['total_files']*100:.1f}%)") | |
| 140 | - logger.info(f"失败: {stats['failed']} ({stats['failed']/stats['total_files']*100:.1f}%)") | |
| 141 | - | |
| 137 | + logger.info(f"\n[File Statistics]") | |
| 138 | + logger.info(f"Total files: {stats['total_files']}") | |
| 139 | + logger.info(f"Successful: {stats['successful']} ({stats['successful']/stats['total_files']*100:.1f}%)") | |
| 140 | + logger.info(f"Failed: {stats['failed']} ({stats['failed']/stats['total_files']*100:.1f}%)") | |
| 141 | + | |
| 142 | 142 | # 商品统计 |
| 143 | - logger.info(f"\n【商品统计】") | |
| 144 | - logger.info(f"总商品数: {stats['total_items']}") | |
| 143 | + logger.info(f"\n[Product Statistics]") | |
| 144 | + logger.info(f"Total products: {stats['total_items']}") | |
| 145 | 145 | if stats['items_per_query']: |
| 146 | 146 | avg_items = sum(stats['items_per_query']) / len(stats['items_per_query']) |
| 147 | 147 | max_items = max(stats['items_per_query']) |
| 148 | 148 | min_items = min(stats['items_per_query']) |
| 149 | - logger.info(f"平均每个查询: {avg_items:.1f} 个商品") | |
| 150 | - logger.info(f"最多: {max_items} 个") | |
| 151 | - logger.info(f"最少: {min_items} 个") | |
| 149 | + logger.info(f"Average per query: {avg_items:.1f} products") | |
| 150 | + logger.info(f"Maximum: {max_items} products") | |
| 151 | + logger.info(f"Minimum: {min_items} products") | |
| 152 | 152 | |
| 153 | 153 | # 价格分布 |
| 154 | 154 | if stats['price_ranges']: |
| 155 | - logger.info(f"\n【价格分布】") | |
| 155 | + logger.info(f"\n[Price Distribution]") | |
| 156 | 156 | total_priced = sum(stats['price_ranges'].values()) |
| 157 | 157 | for price_range, count in sorted(stats['price_ranges'].items()): |
| 158 | 158 | percentage = count / total_priced * 100 |
| ... | ... | @@ -162,15 +162,15 @@ class ResultAnalyzer: |
| 162 | 162 | if stats['avg_reviews']: |
| 163 | 163 | avg_reviews = sum(stats['avg_reviews']) / len(stats['avg_reviews']) |
| 164 | 164 | max_reviews = max(stats['avg_reviews']) |
| 165 | - logger.info(f"\n【评论统计】") | |
| 166 | - logger.info(f"平均评论数: {avg_reviews:.0f}") | |
| 167 | - logger.info(f"最高评论数: {max_reviews}") | |
| 165 | + logger.info(f"\n[Review Statistics]") | |
| 166 | + logger.info(f"Average reviews: {avg_reviews:.0f}") | |
| 167 | + logger.info(f"Highest reviews: {max_reviews}") | |
| 168 | 168 | |
| 169 | 169 | # 评分统计 |
| 170 | 170 | if stats['avg_stars']: |
| 171 | 171 | avg_stars = sum(stats['avg_stars']) / len(stats['avg_stars']) |
| 172 | - logger.info(f"\n【评分统计】") | |
| 173 | - logger.info(f"平均评分: {avg_stars:.2f}") | |
| 172 | + logger.info(f"\n[Rating Statistics]") | |
| 173 | + logger.info(f"Average rating: {avg_stars:.2f}") | |
| 174 | 174 | |
| 175 | 175 | logger.info("\n" + "=" * 70) |
| 176 | 176 | |
| ... | ... | @@ -204,9 +204,9 @@ class ResultAnalyzer: |
| 204 | 204 | try: |
| 205 | 205 | with open(report_file, 'w', encoding='utf-8') as f: |
| 206 | 206 | json.dump(report, f, ensure_ascii=False, indent=2) |
| 207 | - logger.info(f"分析报告已保存: {report_file}") | |
| 207 | + logger.info(f"Analysis report saved: {report_file}") | |
| 208 | 208 | except Exception as e: |
| 209 | - logger.error(f"保存报告失败: {str(e)}") | |
| 209 | + logger.error(f"Failed to save report: {str(e)}") | |
| 210 | 210 | |
| 211 | 211 | def export_csv(self, output_file: str = None): |
| 212 | 212 | """导出为CSV格式""" |
| ... | ... | @@ -215,7 +215,7 @@ class ResultAnalyzer: |
| 215 | 215 | if output_file is None: |
| 216 | 216 | output_file = self.results_dir / "items_export.csv" |
| 217 | 217 | |
| 218 | - logger.info(f"\n导出CSV: {output_file}") | |
| 218 | + logger.info(f"\nExporting to CSV: {output_file}") | |
| 219 | 219 | |
| 220 | 220 | json_files = list(self.results_dir.glob("*.json")) |
| 221 | 221 | |
| ... | ... | @@ -243,9 +243,9 @@ class ResultAnalyzer: |
| 243 | 243 | item.get('detail_url', '') |
| 244 | 244 | ]) |
| 245 | 245 | except Exception as e: |
| 246 | - logger.error(f"导出失败 {json_file.name}: {str(e)}") | |
| 246 | + logger.error(f"Export failed for {json_file.name}: {str(e)}") | |
| 247 | 247 | |
| 248 | - logger.info(f"CSV导出完成: {output_file}") | |
| 248 | + logger.info(f"CSV export completed: {output_file}") | |
| 249 | 249 | |
| 250 | 250 | |
| 251 | 251 | def main(): |
| ... | ... | @@ -270,7 +270,7 @@ def main(): |
| 270 | 270 | analyzer.export_csv(args.output) |
| 271 | 271 | |
| 272 | 272 | except Exception as e: |
| 273 | - logger.error(f"分析失败: {str(e)}") | |
| 273 | + logger.error(f"Analysis failed: {str(e)}") | |
| 274 | 274 | |
| 275 | 275 | |
| 276 | 276 | if __name__ == "__main__": | ... | ... |
docs/常用查询 - sql.sql
scripts/check_es_data.py
| 1 | 1 | #!/usr/bin/env python3 |
| 2 | 2 | """ |
| 3 | -检查ES索引中的实际数据,看分面字段是否有值 | |
| 3 | +Check actual data in ES index to see if facet fields have values | |
| 4 | 4 | """ |
| 5 | 5 | |
| 6 | 6 | import sys |
| ... | ... | @@ -14,9 +14,9 @@ from utils.es_client import ESClient |
| 14 | 14 | |
| 15 | 15 | |
| 16 | 16 | def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): |
| 17 | - """检查ES中的分面相关字段""" | |
| 17 | + """Check facet-related fields in ES""" | |
| 18 | 18 | print("\n" + "="*60) |
| 19 | - print("检查ES索引中的分面字段数据") | |
| 19 | + print("Checking facet field data in ES index") | |
| 20 | 20 | print("="*60) |
| 21 | 21 | |
| 22 | 22 | query = { |
| ... | ... | @@ -46,14 +46,14 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): |
| 46 | 46 | hits = response.get('hits', {}).get('hits', []) |
| 47 | 47 | total = response.get('hits', {}).get('total', {}).get('value', 0) |
| 48 | 48 | |
| 49 | - print(f"\n总文档数: {total}") | |
| 50 | - print(f"检查前 {len(hits)} 个文档:\n") | |
| 49 | + print(f"\nTotal documents: {total}") | |
| 50 | + print(f"Checking first {len(hits)} documents:\n") | |
| 51 | 51 | |
| 52 | 52 | for i, hit in enumerate(hits, 1): |
| 53 | 53 | source = hit.get('_source', {}) |
| 54 | 54 | title_obj = source.get("title") or {} |
| 55 | 55 | category_path_obj = source.get("category_path") or {} |
| 56 | - print(f"文档 {i}:") | |
| 56 | + print(f"Document {i}:") | |
| 57 | 57 | print(f" spu_id: {source.get('spu_id')}") |
| 58 | 58 | print(f" title.zh: {str(title_obj.get('zh', ''))[:50] if isinstance(title_obj, dict) else ''}") |
| 59 | 59 | print(f" category1_name: {source.get('category1_name')}") |
| ... | ... | @@ -67,24 +67,24 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): |
| 67 | 67 | |
| 68 | 68 | specs = source.get('specifications', []) |
| 69 | 69 | if specs: |
| 70 | - print(f" specifications 数量: {len(specs)}") | |
| 70 | + print(f" specifications count: {len(specs)}") | |
| 71 | 71 | # 显示前3个specifications |
| 72 | 72 | for spec in specs[:3]: |
| 73 | 73 | print(f" - name: {spec.get('name')}, value: {spec.get('value')}") |
| 74 | 74 | else: |
| 75 | - print(f" specifications: 空") | |
| 75 | + print(f" specifications: empty") | |
| 76 | 76 | print() |
| 77 | 77 | |
| 78 | 78 | except Exception as e: |
| 79 | - print(f"错误: {e}") | |
| 79 | + print(f"Error: {e}") | |
| 80 | 80 | import traceback |
| 81 | 81 | traceback.print_exc() |
| 82 | 82 | |
| 83 | 83 | |
| 84 | 84 | def check_facet_aggregations(es_client, tenant_id: str): |
| 85 | - """检查分面聚合查询""" | |
| 85 | + """Check facet aggregation queries""" | |
| 86 | 86 | print("\n" + "="*60) |
| 87 | - print("检查分面聚合查询结果") | |
| 87 | + print("Checking facet aggregation query results") | |
| 88 | 88 | print("="*60) |
| 89 | 89 | |
| 90 | 90 | query = { |
| ... | ... | @@ -174,16 +174,16 @@ def check_facet_aggregations(es_client, tenant_id: str): |
| 174 | 174 | response = es_client.client.search(index="search_products", body=query) |
| 175 | 175 | aggs = response.get('aggregations', {}) |
| 176 | 176 | |
| 177 | - print("\n1. category1_name 分面:") | |
| 177 | + print("\n1. category1_name facet:") | |
| 178 | 178 | category1 = aggs.get('category1_facet', {}) |
| 179 | 179 | buckets = category1.get('buckets', []) |
| 180 | 180 | if buckets: |
| 181 | 181 | for bucket in buckets: |
| 182 | 182 | print(f" {bucket['key']}: {bucket['doc_count']}") |
| 183 | 183 | else: |
| 184 | - print(" 空(没有数据)") | |
| 184 | + print(" empty (no data)") | |
| 185 | 185 | |
| 186 | - print("\n2. specifications.color 分面:") | |
| 186 | + print("\n2. specifications.color facet:") | |
| 187 | 187 | color_agg = aggs.get('color_facet', {}) |
| 188 | 188 | color_filter = color_agg.get('filter_by_name', {}) |
| 189 | 189 | color_values = color_filter.get('value_counts', {}) |
| ... | ... | @@ -192,9 +192,9 @@ def check_facet_aggregations(es_client, tenant_id: str): |
| 192 | 192 | for bucket in color_buckets: |
| 193 | 193 | print(f" {bucket['key']}: {bucket['doc_count']}") |
| 194 | 194 | else: |
| 195 | - print(" 空(没有数据)") | |
| 195 | + print(" empty (no data)") | |
| 196 | 196 | |
| 197 | - print("\n3. specifications.size 分面:") | |
| 197 | + print("\n3. specifications.size facet:") | |
| 198 | 198 | size_agg = aggs.get('size_facet', {}) |
| 199 | 199 | size_filter = size_agg.get('filter_by_name', {}) |
| 200 | 200 | size_values = size_filter.get('value_counts', {}) |
| ... | ... | @@ -203,9 +203,9 @@ def check_facet_aggregations(es_client, tenant_id: str): |
| 203 | 203 | for bucket in size_buckets: |
| 204 | 204 | print(f" {bucket['key']}: {bucket['doc_count']}") |
| 205 | 205 | else: |
| 206 | - print(" 空(没有数据)") | |
| 206 | + print(" empty (no data)") | |
| 207 | 207 | |
| 208 | - print("\n4. specifications.material 分面:") | |
| 208 | + print("\n4. specifications.material facet:") | |
| 209 | 209 | material_agg = aggs.get('material_facet', {}) |
| 210 | 210 | material_filter = material_agg.get('filter_by_name', {}) |
| 211 | 211 | material_values = material_filter.get('value_counts', {}) |
| ... | ... | @@ -214,19 +214,19 @@ def check_facet_aggregations(es_client, tenant_id: str): |
| 214 | 214 | for bucket in material_buckets: |
| 215 | 215 | print(f" {bucket['key']}: {bucket['doc_count']}") |
| 216 | 216 | else: |
| 217 | - print(" 空(没有数据)") | |
| 217 | + print(" empty (no data)") | |
| 218 | 218 | |
| 219 | 219 | except Exception as e: |
| 220 | - print(f"错误: {e}") | |
| 220 | + print(f"Error: {e}") | |
| 221 | 221 | import traceback |
| 222 | 222 | traceback.print_exc() |
| 223 | 223 | |
| 224 | 224 | |
| 225 | 225 | def main(): |
| 226 | - parser = argparse.ArgumentParser(description='检查ES索引中的分面字段数据') | |
| 226 | + parser = argparse.ArgumentParser(description='Check facet field data in ES index') | |
| 227 | 227 | parser.add_argument('--tenant-id', required=True, help='Tenant ID') |
| 228 | - parser.add_argument('--es-host', help='Elasticsearch host (或使用环境变量 ES_HOST, 默认: http://localhost:9200)') | |
| 229 | - parser.add_argument('--size', type=int, default=5, help='检查的文档数量 (默认: 5)') | |
| 228 | + parser.add_argument('--es-host', help='Elasticsearch host (or use env var ES_HOST, default: http://localhost:9200)') | |
| 229 | + parser.add_argument('--size', type=int, default=5, help='Number of documents to check (default: 5)') | |
| 230 | 230 | |
| 231 | 231 | args = parser.parse_args() |
| 232 | 232 | |
| ... | ... | @@ -235,7 +235,7 @@ def main(): |
| 235 | 235 | es_username = os.environ.get('ES_USERNAME') |
| 236 | 236 | es_password = os.environ.get('ES_PASSWORD') |
| 237 | 237 | |
| 238 | - print(f"连接Elasticsearch: {es_host}") | |
| 238 | + print(f"Connecting to Elasticsearch: {es_host}") | |
| 239 | 239 | print(f"Tenant ID: {args.tenant_id}\n") |
| 240 | 240 | |
| 241 | 241 | try: |
| ... | ... | @@ -245,11 +245,11 @@ def main(): |
| 245 | 245 | es_client = ESClient(hosts=[es_host]) |
| 246 | 246 | |
| 247 | 247 | if not es_client.ping(): |
| 248 | - print(f"✗ 无法连接到Elasticsearch: {es_host}") | |
| 248 | + print(f"✗ Cannot connect to Elasticsearch: {es_host}") | |
| 249 | 249 | return 1 |
| 250 | - print("✓ Elasticsearch连接成功\n") | |
| 250 | + print("✓ Elasticsearch connected successfully\n") | |
| 251 | 251 | except Exception as e: |
| 252 | - print(f"✗ 连接Elasticsearch失败: {e}") | |
| 252 | + print(f"✗ Failed to connect to Elasticsearch: {e}") | |
| 253 | 253 | return 1 |
| 254 | 254 | |
| 255 | 255 | # 检查ES数据 |
| ... | ... | @@ -257,7 +257,7 @@ def main(): |
| 257 | 257 | check_facet_aggregations(es_client, args.tenant_id) |
| 258 | 258 | |
| 259 | 259 | print("\n" + "="*60) |
| 260 | - print("检查完成") | |
| 260 | + print("Check completed") | |
| 261 | 261 | print("="*60) |
| 262 | 262 | |
| 263 | 263 | return 0 | ... | ... |
scripts/monitor_eviction.py
| 1 | 1 | #!/usr/bin/env python3 |
| 2 | 2 | """ |
| 3 | -实时监控 Redis 缓存淘汰事件 | |
| 3 | +Real-time monitoring of Redis cache eviction events | |
| 4 | 4 | |
| 5 | -持续监控 evicted_keys 统计,当有新的淘汰发生时发出警告 | |
| 5 | +Continuously monitor evicted_keys statistics and warn when new evictions occur | |
| 6 | 6 | """ |
| 7 | 7 | |
| 8 | 8 | import redis |
| ... | ... | @@ -18,7 +18,7 @@ sys.path.insert(0, str(project_root)) |
| 18 | 18 | from config.env_config import REDIS_CONFIG |
| 19 | 19 | |
| 20 | 20 | def get_redis_client(): |
| 21 | - """获取 Redis 客户端""" | |
| 21 | + """Get Redis client""" | |
| 22 | 22 | return redis.Redis( |
| 23 | 23 | host=REDIS_CONFIG.get('host', 'localhost'), |
| 24 | 24 | port=REDIS_CONFIG.get('port', 6479), |
| ... | ... | @@ -29,12 +29,12 @@ def get_redis_client(): |
| 29 | 29 | ) |
| 30 | 30 | |
| 31 | 31 | def monitor_eviction(interval=5): |
| 32 | - """持续监控淘汰事件""" | |
| 32 | + """Continuously monitor eviction events""" | |
| 33 | 33 | print("=" * 60) |
| 34 | - print("Redis 缓存淘汰实时监控") | |
| 34 | + print("Redis Cache Eviction Real-time Monitoring") | |
| 35 | 35 | print("=" * 60) |
| 36 | - print(f"监控间隔: {interval} 秒") | |
| 37 | - print("按 Ctrl+C 停止监控") | |
| 36 | + print(f"Monitoring interval: {interval} seconds") | |
| 37 | + print("Press Ctrl+C to stop monitoring") | |
| 38 | 38 | print("=" * 60) |
| 39 | 39 | print() |
| 40 | 40 | |
| ... | ... | @@ -42,7 +42,7 @@ def monitor_eviction(interval=5): |
| 42 | 42 | client = get_redis_client() |
| 43 | 43 | client.ping() |
| 44 | 44 | except Exception as e: |
| 45 | - print(f"❌ Redis 连接失败: {e}") | |
| 45 | + print(f"❌ Redis connection failed: {e}") | |
| 46 | 46 | return |
| 47 | 47 | |
| 48 | 48 | last_evicted = 0 |
| ... | ... | @@ -55,8 +55,8 @@ def monitor_eviction(interval=5): |
| 55 | 55 | if current_evicted > last_evicted: |
| 56 | 56 | new_evictions = current_evicted - last_evicted |
| 57 | 57 | timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
| 58 | - print(f"[{timestamp}] ⚠️ 检测到 {new_evictions} 个新的淘汰事件!") | |
| 59 | - print(f" 累计淘汰总数: {current_evicted:,}") | |
| 58 | + print(f"[{timestamp}] ⚠️ Detected {new_evictions} new eviction events!") | |
| 59 | + print(f" Total evictions: {current_evicted:,}") | |
| 60 | 60 | |
| 61 | 61 | # 检查内存使用情况 |
| 62 | 62 | mem_info = client.info('memory') |
| ... | ... | @@ -64,26 +64,26 @@ def monitor_eviction(interval=5): |
| 64 | 64 | used_memory = mem_info.get('used_memory', 0) |
| 65 | 65 | if maxmemory > 0: |
| 66 | 66 | usage_percent = (used_memory / maxmemory) * 100 |
| 67 | - print(f" 当前内存使用率: {usage_percent:.2f}%") | |
| 67 | + print(f" Current memory usage: {usage_percent:.2f}%") | |
| 68 | 68 | |
| 69 | 69 | last_evicted = current_evicted |
| 70 | 70 | else: |
| 71 | 71 | timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
| 72 | - print(f"[{timestamp}] ✅ 无新淘汰事件 (累计: {current_evicted:,})") | |
| 72 | + print(f"[{timestamp}] ✅ No new eviction events (Total: {current_evicted:,})") | |
| 73 | 73 | |
| 74 | 74 | time.sleep(interval) |
| 75 | 75 | |
| 76 | 76 | except KeyboardInterrupt: |
| 77 | - print("\n\n监控已停止") | |
| 77 | + print("\n\nMonitoring stopped") | |
| 78 | 78 | except Exception as e: |
| 79 | - print(f"\n❌ 监控出错: {e}") | |
| 79 | + print(f"\n❌ Monitoring error: {e}") | |
| 80 | 80 | import traceback |
| 81 | 81 | traceback.print_exc() |
| 82 | 82 | |
| 83 | 83 | if __name__ == "__main__": |
| 84 | 84 | import argparse |
| 85 | - parser = argparse.ArgumentParser(description='实时监控 Redis 缓存淘汰事件') | |
| 86 | - parser.add_argument('--interval', type=int, default=5, help='监控间隔(秒),默认 5 秒') | |
| 85 | + parser = argparse.ArgumentParser(description='Real-time monitoring of Redis cache eviction events') | |
| 86 | + parser.add_argument('--interval', type=int, default=5, help='Monitoring interval in seconds (default: 5)') | |
| 87 | 87 | args = parser.parse_args() |
| 88 | 88 | |
| 89 | 89 | monitor_eviction(interval=args.interval) | ... | ... |