Commit 7746376c302e7ef1da9aba268a67baaa1d2e5175

Authored by tangwang
1 parent 506c39b7

日志统一用中文

data/wanbang/amazon_crawler.py
@@ -85,7 +85,7 @@ class AmazonCrawler: @@ -85,7 +85,7 @@ class AmazonCrawler:
85 params[param] = kwargs[param] 85 params[param] = kwargs[param]
86 86
87 try: 87 try:
88 - logger.info(f"正在请求: {query}") 88 + logger.info(f"Making request: {query}")
89 self.total_requests += 1 89 self.total_requests += 1
90 90
91 response = requests.get( 91 response = requests.get(
@@ -98,20 +98,20 @@ class AmazonCrawler: @@ -98,20 +98,20 @@ class AmazonCrawler:
98 data = response.json() 98 data = response.json()
99 99
100 if data.get('error_code') == '0000': 100 if data.get('error_code') == '0000':
101 - logger.info(f"✓ 成功: {query} - 获得 {data.get('items', {}).get('real_total_results', 0)} 个结果") 101 + logger.info(f"✓ Success: {query} - Got {data.get('items', {}).get('real_total_results', 0)} results")
102 self.successful_requests += 1 102 self.successful_requests += 1
103 return data 103 return data
104 else: 104 else:
105 - logger.error(f"✗ API错误: {query} - {data.get('reason', 'Unknown error')}") 105 + logger.error(f"✗ API error: {query} - {data.get('reason', 'Unknown error')}")
106 self.failed_requests += 1 106 self.failed_requests += 1
107 return data 107 return data
108 108
109 except requests.exceptions.RequestException as e: 109 except requests.exceptions.RequestException as e:
110 - logger.error(f"✗ 请求失败: {query} - {str(e)}") 110 + logger.error(f"✗ Request failed: {query} - {str(e)}")
111 self.failed_requests += 1 111 self.failed_requests += 1
112 return None 112 return None
113 except json.JSONDecodeError as e: 113 except json.JSONDecodeError as e:
114 - logger.error(f"✗ JSON解析失败: {query} - {str(e)}") 114 + logger.error(f"✗ JSON parse failed: {query} - {str(e)}")
115 self.failed_requests += 1 115 self.failed_requests += 1
116 return None 116 return None
117 117
@@ -127,36 +127,36 @@ class AmazonCrawler: @@ -127,36 +127,36 @@ class AmazonCrawler:
127 try: 127 try:
128 with open(filepath, 'w', encoding='utf-8') as f: 128 with open(filepath, 'w', encoding='utf-8') as f:
129 json.dump(data, f, ensure_ascii=False, indent=2) 129 json.dump(data, f, ensure_ascii=False, indent=2)
130 - logger.debug(f"已保存: {filename}") 130 + logger.debug(f"Saved: {filename}")
131 except Exception as e: 131 except Exception as e:
132 - logger.error(f"保存失败: {filename} - {str(e)}") 132 + logger.error(f"Save failed: {filename} - {str(e)}")
133 133
134 def crawl_from_file(self, queries_file: str, delay: float = 1.0, 134 def crawl_from_file(self, queries_file: str, delay: float = 1.0,
135 start_index: int = 0, max_queries: Optional[int] = None): 135 start_index: int = 0, max_queries: Optional[int] = None):
136 """从文件读取查询列表并批量爬取""" 136 """从文件读取查询列表并批量爬取"""
137 self.start_time = datetime.now() 137 self.start_time = datetime.now()
138 logger.info("=" * 70) 138 logger.info("=" * 70)
139 - logger.info(f"Amazon爬虫启动 - {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}") 139 + logger.info(f"Amazon crawler started - {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}")
140 logger.info("=" * 70) 140 logger.info("=" * 70)
141 - logger.info(f"查询文件: {queries_file}")  
142 - logger.info(f"结果目录: {self.results_dir}") 141 + logger.info(f"Queries file: {queries_file}")
  142 + logger.info(f"Results directory: {self.results_dir}")
143 143
144 try: 144 try:
145 with open(queries_file, 'r', encoding='utf-8') as f: 145 with open(queries_file, 'r', encoding='utf-8') as f:
146 queries = [line.strip() for line in f if line.strip()] 146 queries = [line.strip() for line in f if line.strip()]
147 147
148 total_queries = len(queries) 148 total_queries = len(queries)
149 - logger.info(f"共读取 {total_queries} 个查询") 149 + logger.info(f"Total queries read: {total_queries}")
150 150
151 if start_index > 0: 151 if start_index > 0:
152 queries = queries[start_index:] 152 queries = queries[start_index:]
153 - logger.info(f"从索引 {start_index} 开始") 153 + logger.info(f"Starting from index {start_index}")
154 154
155 if max_queries: 155 if max_queries:
156 queries = queries[:max_queries] 156 queries = queries[:max_queries]
157 - logger.info(f"限制爬取数量: {max_queries}") 157 + logger.info(f"Limit crawl count to: {max_queries}")
158 158
159 - logger.info(f"请求间隔: {delay} 秒") 159 + logger.info(f"Request interval: {delay} seconds")
160 logger.info("=" * 70) 160 logger.info("=" * 70)
161 161
162 # 逐个爬取 162 # 逐个爬取
@@ -189,24 +189,24 @@ class AmazonCrawler: @@ -189,24 +189,24 @@ class AmazonCrawler:
189 duration = end_time - self.start_time 189 duration = end_time - self.start_time
190 190
191 logger.info("=" * 70) 191 logger.info("=" * 70)
192 - logger.info("爬取完成!") 192 + logger.info("Crawling completed!")
193 logger.info("=" * 70) 193 logger.info("=" * 70)
194 - logger.info(f"开始时间: {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}")  
195 - logger.info(f"结束时间: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")  
196 - logger.info(f"总耗时: {duration}")  
197 - logger.info(f"总请求数: {self.total_requests}")  
198 - logger.info(f"成功: {self.successful_requests} ({self.successful_requests/self.total_requests*100:.1f}%)")  
199 - logger.info(f"失败: {self.failed_requests} ({self.failed_requests/self.total_requests*100:.1f}%)")  
200 - logger.info(f"结果保存在: {self.results_dir.absolute()}") 194 + logger.info(f"Start time: {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}")
  195 + logger.info(f"End time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
  196 + logger.info(f"Total duration: {duration}")
  197 + logger.info(f"Total requests: {self.total_requests}")
  198 + logger.info(f"Successful: {self.successful_requests} ({self.successful_requests/self.total_requests*100:.1f}%)")
  199 + logger.info(f"Failed: {self.failed_requests} ({self.failed_requests/self.total_requests*100:.1f}%)")
  200 + logger.info(f"Results saved to: {self.results_dir.absolute()}")
201 logger.info("=" * 70) 201 logger.info("=" * 70)
202 202
203 except FileNotFoundError: 203 except FileNotFoundError:
204 - logger.error(f"文件不存在: {queries_file}") 204 + logger.error(f"File not found: {queries_file}")
205 except KeyboardInterrupt: 205 except KeyboardInterrupt:
206 - logger.warning("\n用户中断爬取")  
207 - logger.info(f"已完成: {self.successful_requests}/{self.total_requests}") 206 + logger.warning("\nUser interrupted crawling")
  207 + logger.info(f"Completed: {self.successful_requests}/{self.total_requests}")
208 except Exception as e: 208 except Exception as e:
209 - logger.error(f"爬取过程出错: {str(e)}", exc_info=True) 209 + logger.error(f"Error during crawling: {str(e)}", exc_info=True)
210 210
211 211
212 def load_config(): 212 def load_config():
@@ -216,7 +216,7 @@ def load_config(): @@ -216,7 +216,7 @@ def load_config():
216 import config 216 import config
217 return config 217 return config
218 except ImportError: 218 except ImportError:
219 - logger.warning("未找到配置文件 config.py,使用默认配置") 219 + logger.warning("Config file config.py not found, using default configuration")
220 return None 220 return None
221 221
222 222
@@ -258,12 +258,12 @@ def main(): @@ -258,12 +258,12 @@ def main():
258 if not api_key or not api_secret or \ 258 if not api_key or not api_secret or \
259 api_key == "your_api_key_here" or api_secret == "your_api_secret_here": 259 api_key == "your_api_key_here" or api_secret == "your_api_secret_here":
260 logger.error("=" * 70) 260 logger.error("=" * 70)
261 - logger.error("错误: 未配置API密钥!") 261 + logger.error("Error: API key not configured!")
262 logger.error("") 262 logger.error("")
263 - logger.error("请使用以下任一方式配置API密钥:")  
264 - logger.error("1. 命令行参数: --key YOUR_KEY --secret YOUR_SECRET")  
265 - logger.error("2. 配置文件: 复制 config.example.py 为 config.py 并填入密钥")  
266 - logger.error("3. 环境变量: ONEBOUND_API_KEY 和 ONEBOUND_API_SECRET") 263 + logger.error("Please configure API key using one of the following methods:")
  264 + logger.error("1. Command line arguments: --key YOUR_KEY --secret YOUR_SECRET")
  265 + logger.error("2. Config file: Copy config.example.py to config.py and fill in the keys")
  266 + logger.error("3. Environment variables: ONEBOUND_API_KEY and ONEBOUND_API_SECRET")
267 logger.error("=" * 70) 267 logger.error("=" * 70)
268 return 268 return
269 269
data/wanbang/analyze_results.py
@@ -30,22 +30,22 @@ class ResultAnalyzer: @@ -30,22 +30,22 @@ class ResultAnalyzer:
30 """ 30 """
31 self.results_dir = Path(results_dir) 31 self.results_dir = Path(results_dir)
32 if not self.results_dir.exists(): 32 if not self.results_dir.exists():
33 - logger.error(f"结果目录不存在: {self.results_dir}") 33 + logger.error(f"Results directory does not exist: {self.results_dir}")
34 raise FileNotFoundError(f"Directory not found: {self.results_dir}") 34 raise FileNotFoundError(f"Directory not found: {self.results_dir}")
35 35
36 def analyze(self): 36 def analyze(self):
37 """执行完整分析""" 37 """执行完整分析"""
38 logger.info("=" * 70) 38 logger.info("=" * 70)
39 - logger.info("Amazon爬取结果分析") 39 + logger.info("Amazon crawling result analysis")
40 logger.info("=" * 70) 40 logger.info("=" * 70)
41 - logger.info(f"结果目录: {self.results_dir.absolute()}") 41 + logger.info(f"Results directory: {self.results_dir.absolute()}")
42 42
43 # 获取所有JSON文件 43 # 获取所有JSON文件
44 json_files = list(self.results_dir.glob("*.json")) 44 json_files = list(self.results_dir.glob("*.json"))
45 - logger.info(f"JSON文件数量: {len(json_files)}") 45 + logger.info(f"Number of JSON files: {len(json_files)}")
46 46
47 if not json_files: 47 if not json_files:
48 - logger.warning("未找到任何JSON文件") 48 + logger.warning("No JSON files found")
49 return 49 return
50 50
51 # 统计数据 51 # 统计数据
@@ -62,7 +62,7 @@ class ResultAnalyzer: @@ -62,7 +62,7 @@ class ResultAnalyzer:
62 } 62 }
63 63
64 # 分析每个文件 64 # 分析每个文件
65 - logger.info("\n正在分析文件...") 65 + logger.info("\nAnalyzing files...")
66 for json_file in json_files: 66 for json_file in json_files:
67 try: 67 try:
68 with open(json_file, 'r', encoding='utf-8') as f: 68 with open(json_file, 'r', encoding='utf-8') as f:
@@ -118,7 +118,7 @@ class ResultAnalyzer: @@ -118,7 +118,7 @@ class ResultAnalyzer:
118 stats['failed'] += 1 118 stats['failed'] += 1
119 119
120 except Exception as e: 120 except Exception as e:
121 - logger.error(f"分析文件失败 {json_file.name}: {str(e)}") 121 + logger.error(f"Failed to analyze file {json_file.name}: {str(e)}")
122 stats['failed'] += 1 122 stats['failed'] += 1
123 123
124 # 输出统计结果 124 # 输出统计结果
@@ -130,29 +130,29 @@ class ResultAnalyzer: @@ -130,29 +130,29 @@ class ResultAnalyzer:
130 def print_stats(self, stats: Dict): 130 def print_stats(self, stats: Dict):
131 """打印统计信息""" 131 """打印统计信息"""
132 logger.info("\n" + "=" * 70) 132 logger.info("\n" + "=" * 70)
133 - logger.info("统计结果") 133 + logger.info("Statistics results")
134 logger.info("=" * 70) 134 logger.info("=" * 70)
135 135
136 # 基本统计 136 # 基本统计
137 - logger.info(f"\n【文件统计】")  
138 - logger.info(f"总文件数: {stats['total_files']}")  
139 - logger.info(f"成功: {stats['successful']} ({stats['successful']/stats['total_files']*100:.1f}%)")  
140 - logger.info(f"失败: {stats['failed']} ({stats['failed']/stats['total_files']*100:.1f}%)")  
141 - 137 + logger.info(f"\n[File Statistics]")
  138 + logger.info(f"Total files: {stats['total_files']}")
  139 + logger.info(f"Successful: {stats['successful']} ({stats['successful']/stats['total_files']*100:.1f}%)")
  140 + logger.info(f"Failed: {stats['failed']} ({stats['failed']/stats['total_files']*100:.1f}%)")
  141 +
142 # 商品统计 142 # 商品统计
143 - logger.info(f"\n【商品统计】")  
144 - logger.info(f"总商品数: {stats['total_items']}") 143 + logger.info(f"\n[Product Statistics]")
  144 + logger.info(f"Total products: {stats['total_items']}")
145 if stats['items_per_query']: 145 if stats['items_per_query']:
146 avg_items = sum(stats['items_per_query']) / len(stats['items_per_query']) 146 avg_items = sum(stats['items_per_query']) / len(stats['items_per_query'])
147 max_items = max(stats['items_per_query']) 147 max_items = max(stats['items_per_query'])
148 min_items = min(stats['items_per_query']) 148 min_items = min(stats['items_per_query'])
149 - logger.info(f"平均每个查询: {avg_items:.1f} 个商品")  
150 - logger.info(f"最多: {max_items} 个")  
151 - logger.info(f"最少: {min_items} 个") 149 + logger.info(f"Average per query: {avg_items:.1f} products")
  150 + logger.info(f"Maximum: {max_items} products")
  151 + logger.info(f"Minimum: {min_items} products")
152 152
153 # 价格分布 153 # 价格分布
154 if stats['price_ranges']: 154 if stats['price_ranges']:
155 - logger.info(f"\n【价格分布】") 155 + logger.info(f"\n[Price Distribution]")
156 total_priced = sum(stats['price_ranges'].values()) 156 total_priced = sum(stats['price_ranges'].values())
157 for price_range, count in sorted(stats['price_ranges'].items()): 157 for price_range, count in sorted(stats['price_ranges'].items()):
158 percentage = count / total_priced * 100 158 percentage = count / total_priced * 100
@@ -162,15 +162,15 @@ class ResultAnalyzer: @@ -162,15 +162,15 @@ class ResultAnalyzer:
162 if stats['avg_reviews']: 162 if stats['avg_reviews']:
163 avg_reviews = sum(stats['avg_reviews']) / len(stats['avg_reviews']) 163 avg_reviews = sum(stats['avg_reviews']) / len(stats['avg_reviews'])
164 max_reviews = max(stats['avg_reviews']) 164 max_reviews = max(stats['avg_reviews'])
165 - logger.info(f"\n【评论统计】")  
166 - logger.info(f"平均评论数: {avg_reviews:.0f}")  
167 - logger.info(f"最高评论数: {max_reviews}") 165 + logger.info(f"\n[Review Statistics]")
  166 + logger.info(f"Average reviews: {avg_reviews:.0f}")
  167 + logger.info(f"Highest reviews: {max_reviews}")
168 168
169 # 评分统计 169 # 评分统计
170 if stats['avg_stars']: 170 if stats['avg_stars']:
171 avg_stars = sum(stats['avg_stars']) / len(stats['avg_stars']) 171 avg_stars = sum(stats['avg_stars']) / len(stats['avg_stars'])
172 - logger.info(f"\n【评分统计】")  
173 - logger.info(f"平均评分: {avg_stars:.2f}") 172 + logger.info(f"\n[Rating Statistics]")
  173 + logger.info(f"Average rating: {avg_stars:.2f}")
174 174
175 logger.info("\n" + "=" * 70) 175 logger.info("\n" + "=" * 70)
176 176
@@ -204,9 +204,9 @@ class ResultAnalyzer: @@ -204,9 +204,9 @@ class ResultAnalyzer:
204 try: 204 try:
205 with open(report_file, 'w', encoding='utf-8') as f: 205 with open(report_file, 'w', encoding='utf-8') as f:
206 json.dump(report, f, ensure_ascii=False, indent=2) 206 json.dump(report, f, ensure_ascii=False, indent=2)
207 - logger.info(f"分析报告已保存: {report_file}") 207 + logger.info(f"Analysis report saved: {report_file}")
208 except Exception as e: 208 except Exception as e:
209 - logger.error(f"保存报告失败: {str(e)}") 209 + logger.error(f"Failed to save report: {str(e)}")
210 210
211 def export_csv(self, output_file: str = None): 211 def export_csv(self, output_file: str = None):
212 """导出为CSV格式""" 212 """导出为CSV格式"""
@@ -215,7 +215,7 @@ class ResultAnalyzer: @@ -215,7 +215,7 @@ class ResultAnalyzer:
215 if output_file is None: 215 if output_file is None:
216 output_file = self.results_dir / "items_export.csv" 216 output_file = self.results_dir / "items_export.csv"
217 217
218 - logger.info(f"\n导出CSV: {output_file}") 218 + logger.info(f"\nExporting to CSV: {output_file}")
219 219
220 json_files = list(self.results_dir.glob("*.json")) 220 json_files = list(self.results_dir.glob("*.json"))
221 221
@@ -243,9 +243,9 @@ class ResultAnalyzer: @@ -243,9 +243,9 @@ class ResultAnalyzer:
243 item.get('detail_url', '') 243 item.get('detail_url', '')
244 ]) 244 ])
245 except Exception as e: 245 except Exception as e:
246 - logger.error(f"导出失败 {json_file.name}: {str(e)}") 246 + logger.error(f"Export failed for {json_file.name}: {str(e)}")
247 247
248 - logger.info(f"CSV导出完成: {output_file}") 248 + logger.info(f"CSV export completed: {output_file}")
249 249
250 250
251 def main(): 251 def main():
@@ -270,7 +270,7 @@ def main(): @@ -270,7 +270,7 @@ def main():
270 analyzer.export_csv(args.output) 270 analyzer.export_csv(args.output)
271 271
272 except Exception as e: 272 except Exception as e:
273 - logger.error(f"分析失败: {str(e)}") 273 + logger.error(f"Analysis failed: {str(e)}")
274 274
275 275
276 if __name__ == "__main__": 276 if __name__ == "__main__":
docs/常用查询 - sql.sql
1 -- 查询今天入库的SPU和SKU商品数据 1 -- 查询今天入库的SPU和SKU商品数据
2 -- 用于查询当天新增的商品信息 2 -- 用于查询当天新增的商品信息
3 3
  4 +
  5 +select id, title from shoplazza_product_spu where tenant_id = 170 and deleted = 0 ;
  6 +
4 -- ====================================== 7 -- ======================================
5 -- 1. 查询今天入库的SPU商品 8 -- 1. 查询今天入库的SPU商品
6 -- ====================================== 9 -- ======================================
scripts/check_es_data.py
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 """ 2 """
3 -检查ES索引中的实际数据,看分面字段是否有值 3 +Check actual data in ES index to see if facet fields have values
4 """ 4 """
5 5
6 import sys 6 import sys
@@ -14,9 +14,9 @@ from utils.es_client import ESClient @@ -14,9 +14,9 @@ from utils.es_client import ESClient
14 14
15 15
16 def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): 16 def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
17 - """检查ES中的分面相关字段""" 17 + """Check facet-related fields in ES"""
18 print("\n" + "="*60) 18 print("\n" + "="*60)
19 - print("检查ES索引中的分面字段数据") 19 + print("Checking facet field data in ES index")
20 print("="*60) 20 print("="*60)
21 21
22 query = { 22 query = {
@@ -46,14 +46,14 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): @@ -46,14 +46,14 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
46 hits = response.get('hits', {}).get('hits', []) 46 hits = response.get('hits', {}).get('hits', [])
47 total = response.get('hits', {}).get('total', {}).get('value', 0) 47 total = response.get('hits', {}).get('total', {}).get('value', 0)
48 48
49 - print(f"\n总文档数: {total}")  
50 - print(f"检查前 {len(hits)} 个文档:\n") 49 + print(f"\nTotal documents: {total}")
  50 + print(f"Checking first {len(hits)} documents:\n")
51 51
52 for i, hit in enumerate(hits, 1): 52 for i, hit in enumerate(hits, 1):
53 source = hit.get('_source', {}) 53 source = hit.get('_source', {})
54 title_obj = source.get("title") or {} 54 title_obj = source.get("title") or {}
55 category_path_obj = source.get("category_path") or {} 55 category_path_obj = source.get("category_path") or {}
56 - print(f"文档 {i}:") 56 + print(f"Document {i}:")
57 print(f" spu_id: {source.get('spu_id')}") 57 print(f" spu_id: {source.get('spu_id')}")
58 print(f" title.zh: {str(title_obj.get('zh', ''))[:50] if isinstance(title_obj, dict) else ''}") 58 print(f" title.zh: {str(title_obj.get('zh', ''))[:50] if isinstance(title_obj, dict) else ''}")
59 print(f" category1_name: {source.get('category1_name')}") 59 print(f" category1_name: {source.get('category1_name')}")
@@ -67,24 +67,24 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5): @@ -67,24 +67,24 @@ def check_es_facet_fields(es_client, tenant_id: str, size: int = 5):
67 67
68 specs = source.get('specifications', []) 68 specs = source.get('specifications', [])
69 if specs: 69 if specs:
70 - print(f" specifications 数量: {len(specs)}") 70 + print(f" specifications count: {len(specs)}")
71 # 显示前3个specifications 71 # 显示前3个specifications
72 for spec in specs[:3]: 72 for spec in specs[:3]:
73 print(f" - name: {spec.get('name')}, value: {spec.get('value')}") 73 print(f" - name: {spec.get('name')}, value: {spec.get('value')}")
74 else: 74 else:
75 - print(f" specifications: ") 75 + print(f" specifications: empty")
76 print() 76 print()
77 77
78 except Exception as e: 78 except Exception as e:
79 - print(f"错误: {e}") 79 + print(f"Error: {e}")
80 import traceback 80 import traceback
81 traceback.print_exc() 81 traceback.print_exc()
82 82
83 83
84 def check_facet_aggregations(es_client, tenant_id: str): 84 def check_facet_aggregations(es_client, tenant_id: str):
85 - """检查分面聚合查询""" 85 + """Check facet aggregation queries"""
86 print("\n" + "="*60) 86 print("\n" + "="*60)
87 - print("检查分面聚合查询结果") 87 + print("Checking facet aggregation query results")
88 print("="*60) 88 print("="*60)
89 89
90 query = { 90 query = {
@@ -174,16 +174,16 @@ def check_facet_aggregations(es_client, tenant_id: str): @@ -174,16 +174,16 @@ def check_facet_aggregations(es_client, tenant_id: str):
174 response = es_client.client.search(index="search_products", body=query) 174 response = es_client.client.search(index="search_products", body=query)
175 aggs = response.get('aggregations', {}) 175 aggs = response.get('aggregations', {})
176 176
177 - print("\n1. category1_name 分面:") 177 + print("\n1. category1_name facet:")
178 category1 = aggs.get('category1_facet', {}) 178 category1 = aggs.get('category1_facet', {})
179 buckets = category1.get('buckets', []) 179 buckets = category1.get('buckets', [])
180 if buckets: 180 if buckets:
181 for bucket in buckets: 181 for bucket in buckets:
182 print(f" {bucket['key']}: {bucket['doc_count']}") 182 print(f" {bucket['key']}: {bucket['doc_count']}")
183 else: 183 else:
184 - print(" 空(没有数据)") 184 + print(" empty (no data)")
185 185
186 - print("\n2. specifications.color 分面:") 186 + print("\n2. specifications.color facet:")
187 color_agg = aggs.get('color_facet', {}) 187 color_agg = aggs.get('color_facet', {})
188 color_filter = color_agg.get('filter_by_name', {}) 188 color_filter = color_agg.get('filter_by_name', {})
189 color_values = color_filter.get('value_counts', {}) 189 color_values = color_filter.get('value_counts', {})
@@ -192,9 +192,9 @@ def check_facet_aggregations(es_client, tenant_id: str): @@ -192,9 +192,9 @@ def check_facet_aggregations(es_client, tenant_id: str):
192 for bucket in color_buckets: 192 for bucket in color_buckets:
193 print(f" {bucket['key']}: {bucket['doc_count']}") 193 print(f" {bucket['key']}: {bucket['doc_count']}")
194 else: 194 else:
195 - print(" 空(没有数据)") 195 + print(" empty (no data)")
196 196
197 - print("\n3. specifications.size 分面:") 197 + print("\n3. specifications.size facet:")
198 size_agg = aggs.get('size_facet', {}) 198 size_agg = aggs.get('size_facet', {})
199 size_filter = size_agg.get('filter_by_name', {}) 199 size_filter = size_agg.get('filter_by_name', {})
200 size_values = size_filter.get('value_counts', {}) 200 size_values = size_filter.get('value_counts', {})
@@ -203,9 +203,9 @@ def check_facet_aggregations(es_client, tenant_id: str): @@ -203,9 +203,9 @@ def check_facet_aggregations(es_client, tenant_id: str):
203 for bucket in size_buckets: 203 for bucket in size_buckets:
204 print(f" {bucket['key']}: {bucket['doc_count']}") 204 print(f" {bucket['key']}: {bucket['doc_count']}")
205 else: 205 else:
206 - print(" 空(没有数据)") 206 + print(" empty (no data)")
207 207
208 - print("\n4. specifications.material 分面:") 208 + print("\n4. specifications.material facet:")
209 material_agg = aggs.get('material_facet', {}) 209 material_agg = aggs.get('material_facet', {})
210 material_filter = material_agg.get('filter_by_name', {}) 210 material_filter = material_agg.get('filter_by_name', {})
211 material_values = material_filter.get('value_counts', {}) 211 material_values = material_filter.get('value_counts', {})
@@ -214,19 +214,19 @@ def check_facet_aggregations(es_client, tenant_id: str): @@ -214,19 +214,19 @@ def check_facet_aggregations(es_client, tenant_id: str):
214 for bucket in material_buckets: 214 for bucket in material_buckets:
215 print(f" {bucket['key']}: {bucket['doc_count']}") 215 print(f" {bucket['key']}: {bucket['doc_count']}")
216 else: 216 else:
217 - print(" 空(没有数据)") 217 + print(" empty (no data)")
218 218
219 except Exception as e: 219 except Exception as e:
220 - print(f"错误: {e}") 220 + print(f"Error: {e}")
221 import traceback 221 import traceback
222 traceback.print_exc() 222 traceback.print_exc()
223 223
224 224
225 def main(): 225 def main():
226 - parser = argparse.ArgumentParser(description='检查ES索引中的分面字段数据') 226 + parser = argparse.ArgumentParser(description='Check facet field data in ES index')
227 parser.add_argument('--tenant-id', required=True, help='Tenant ID') 227 parser.add_argument('--tenant-id', required=True, help='Tenant ID')
228 - parser.add_argument('--es-host', help='Elasticsearch host (或使用环境变量 ES_HOST, 默认: http://localhost:9200)')  
229 - parser.add_argument('--size', type=int, default=5, help='检查的文档数量 (默认: 5)') 228 + parser.add_argument('--es-host', help='Elasticsearch host (or use env var ES_HOST, default: http://localhost:9200)')
  229 + parser.add_argument('--size', type=int, default=5, help='Number of documents to check (default: 5)')
230 230
231 args = parser.parse_args() 231 args = parser.parse_args()
232 232
@@ -235,7 +235,7 @@ def main(): @@ -235,7 +235,7 @@ def main():
235 es_username = os.environ.get('ES_USERNAME') 235 es_username = os.environ.get('ES_USERNAME')
236 es_password = os.environ.get('ES_PASSWORD') 236 es_password = os.environ.get('ES_PASSWORD')
237 237
238 - print(f"连接Elasticsearch: {es_host}") 238 + print(f"Connecting to Elasticsearch: {es_host}")
239 print(f"Tenant ID: {args.tenant_id}\n") 239 print(f"Tenant ID: {args.tenant_id}\n")
240 240
241 try: 241 try:
@@ -245,11 +245,11 @@ def main(): @@ -245,11 +245,11 @@ def main():
245 es_client = ESClient(hosts=[es_host]) 245 es_client = ESClient(hosts=[es_host])
246 246
247 if not es_client.ping(): 247 if not es_client.ping():
248 - print(f"✗ 无法连接到Elasticsearch: {es_host}") 248 + print(f"✗ Cannot connect to Elasticsearch: {es_host}")
249 return 1 249 return 1
250 - print("✓ Elasticsearch连接成功\n") 250 + print("✓ Elasticsearch connected successfully\n")
251 except Exception as e: 251 except Exception as e:
252 - print(f"✗ 连接Elasticsearch失败: {e}") 252 + print(f"✗ Failed to connect to Elasticsearch: {e}")
253 return 1 253 return 1
254 254
255 # 检查ES数据 255 # 检查ES数据
@@ -257,7 +257,7 @@ def main(): @@ -257,7 +257,7 @@ def main():
257 check_facet_aggregations(es_client, args.tenant_id) 257 check_facet_aggregations(es_client, args.tenant_id)
258 258
259 print("\n" + "="*60) 259 print("\n" + "="*60)
260 - print("检查完成") 260 + print("Check completed")
261 print("="*60) 261 print("="*60)
262 262
263 return 0 263 return 0
scripts/monitor_eviction.py
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 """ 2 """
3 -实时监控 Redis 缓存淘汰事件 3 +Real-time monitoring of Redis cache eviction events
4 4
5 -持续监控 evicted_keys 统计,当有新的淘汰发生时发出警告 5 +Continuously monitor evicted_keys statistics and warn when new evictions occur
6 """ 6 """
7 7
8 import redis 8 import redis
@@ -18,7 +18,7 @@ sys.path.insert(0, str(project_root)) @@ -18,7 +18,7 @@ sys.path.insert(0, str(project_root))
18 from config.env_config import REDIS_CONFIG 18 from config.env_config import REDIS_CONFIG
19 19
20 def get_redis_client(): 20 def get_redis_client():
21 - """获取 Redis 客户端""" 21 + """Get Redis client"""
22 return redis.Redis( 22 return redis.Redis(
23 host=REDIS_CONFIG.get('host', 'localhost'), 23 host=REDIS_CONFIG.get('host', 'localhost'),
24 port=REDIS_CONFIG.get('port', 6479), 24 port=REDIS_CONFIG.get('port', 6479),
@@ -29,12 +29,12 @@ def get_redis_client(): @@ -29,12 +29,12 @@ def get_redis_client():
29 ) 29 )
30 30
31 def monitor_eviction(interval=5): 31 def monitor_eviction(interval=5):
32 - """持续监控淘汰事件""" 32 + """Continuously monitor eviction events"""
33 print("=" * 60) 33 print("=" * 60)
34 - print("Redis 缓存淘汰实时监控") 34 + print("Redis Cache Eviction Real-time Monitoring")
35 print("=" * 60) 35 print("=" * 60)
36 - print(f"监控间隔: {interval} 秒")  
37 - print("按 Ctrl+C 停止监控") 36 + print(f"Monitoring interval: {interval} seconds")
  37 + print("Press Ctrl+C to stop monitoring")
38 print("=" * 60) 38 print("=" * 60)
39 print() 39 print()
40 40
@@ -42,7 +42,7 @@ def monitor_eviction(interval=5): @@ -42,7 +42,7 @@ def monitor_eviction(interval=5):
42 client = get_redis_client() 42 client = get_redis_client()
43 client.ping() 43 client.ping()
44 except Exception as e: 44 except Exception as e:
45 - print(f"❌ Redis 连接失败: {e}") 45 + print(f"❌ Redis connection failed: {e}")
46 return 46 return
47 47
48 last_evicted = 0 48 last_evicted = 0
@@ -55,8 +55,8 @@ def monitor_eviction(interval=5): @@ -55,8 +55,8 @@ def monitor_eviction(interval=5):
55 if current_evicted > last_evicted: 55 if current_evicted > last_evicted:
56 new_evictions = current_evicted - last_evicted 56 new_evictions = current_evicted - last_evicted
57 timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 57 timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
58 - print(f"[{timestamp}] ⚠️ 检测到 {new_evictions} 个新的淘汰事件!")  
59 - print(f" 累计淘汰总数: {current_evicted:,}") 58 + print(f"[{timestamp}] ⚠️ Detected {new_evictions} new eviction events!")
  59 + print(f" Total evictions: {current_evicted:,}")
60 60
61 # 检查内存使用情况 61 # 检查内存使用情况
62 mem_info = client.info('memory') 62 mem_info = client.info('memory')
@@ -64,26 +64,26 @@ def monitor_eviction(interval=5): @@ -64,26 +64,26 @@ def monitor_eviction(interval=5):
64 used_memory = mem_info.get('used_memory', 0) 64 used_memory = mem_info.get('used_memory', 0)
65 if maxmemory > 0: 65 if maxmemory > 0:
66 usage_percent = (used_memory / maxmemory) * 100 66 usage_percent = (used_memory / maxmemory) * 100
67 - print(f" 当前内存使用率: {usage_percent:.2f}%") 67 + print(f" Current memory usage: {usage_percent:.2f}%")
68 68
69 last_evicted = current_evicted 69 last_evicted = current_evicted
70 else: 70 else:
71 timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 71 timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
72 - print(f"[{timestamp}] ✅ 无新淘汰事件 (累计: {current_evicted:,})") 72 + print(f"[{timestamp}] ✅ No new eviction events (Total: {current_evicted:,})")
73 73
74 time.sleep(interval) 74 time.sleep(interval)
75 75
76 except KeyboardInterrupt: 76 except KeyboardInterrupt:
77 - print("\n\n监控已停止") 77 + print("\n\nMonitoring stopped")
78 except Exception as e: 78 except Exception as e:
79 - print(f"\n❌ 监控出错: {e}") 79 + print(f"\n❌ Monitoring error: {e}")
80 import traceback 80 import traceback
81 traceback.print_exc() 81 traceback.print_exc()
82 82
83 if __name__ == "__main__": 83 if __name__ == "__main__":
84 import argparse 84 import argparse
85 - parser = argparse.ArgumentParser(description='实时监控 Redis 缓存淘汰事件')  
86 - parser.add_argument('--interval', type=int, default=5, help='监控间隔(秒),默认 5 秒') 85 + parser = argparse.ArgumentParser(description='Real-time monitoring of Redis cache eviction events')
  86 + parser.add_argument('--interval', type=int, default=5, help='Monitoring interval in seconds (default: 5)')
87 args = parser.parse_args() 87 args = parser.parse_args()
88 88
89 monitor_eviction(interval=args.interval) 89 monitor_eviction(interval=args.interval)
start_reranker.sh 0 → 100644
@@ -0,0 +1,3 @@ @@ -0,0 +1,3 @@
  1 +
  2 +uvicorn reranker.server:app --host 0.0.0.0 --port 6007
  3 +