Commit 6c5ee5bc0b3c2254784cac7fc541e9472116022e
1 parent
153a592e
add scripts :
scripts/redis/
Showing
2 changed files
with
486 additions
and
57 deletions
Show diff stats
scripts/check_cache_stats.py renamed to scripts/redis/check_cache_stats.py
| ... | ... | @@ -6,11 +6,20 @@ |
| 6 | 6 | |
| 7 | 7 | 使用方法: |
| 8 | 8 | |
| 9 | -直接使用: | |
| 10 | -python scripts/check_cache_stats.py | |
| 9 | +直接使用(默认数据库 0): | |
| 10 | +python scripts/redis/check_cache_stats.py | |
| 11 | 11 | |
| 12 | -或者 只统计以下三种前缀: | |
| 13 | -python scripts/check_cache_stats.py --prefix trans embedding product | |
| 12 | +统计所有数据库: | |
| 13 | +python scripts/redis/check_cache_stats.py --all-db | |
| 14 | + | |
| 15 | +统计指定数据库: | |
| 16 | +python scripts/redis/check_cache_stats.py --db 1 | |
| 17 | + | |
| 18 | +只统计以下三种前缀: | |
| 19 | +python scripts/redis/check_cache_stats.py --prefix trans embedding product | |
| 20 | + | |
| 21 | +统计所有数据库的指定前缀: | |
| 22 | +python scripts/redis/check_cache_stats.py --all-db --prefix trans embedding | |
| 14 | 23 | |
| 15 | 24 | |
| 16 | 25 | |
| ... | ... | @@ -36,18 +45,19 @@ from collections import defaultdict |
| 36 | 45 | from pathlib import Path |
| 37 | 46 | from datetime import datetime |
| 38 | 47 | |
| 39 | -# 添加项目路径 | |
| 40 | -project_root = Path(__file__).parent.parent | |
| 48 | +# 添加项目路径(文件在 scripts/redis/ 目录下,需要向上三级到项目根目录) | |
| 49 | +project_root = Path(__file__).parent.parent.parent | |
| 41 | 50 | sys.path.insert(0, str(project_root)) |
| 42 | 51 | |
| 43 | 52 | from config.env_config import REDIS_CONFIG |
| 44 | 53 | |
| 45 | -def get_redis_client(): | |
| 54 | +def get_redis_client(db=0): | |
| 46 | 55 | """获取 Redis 客户端""" |
| 47 | 56 | return redis.Redis( |
| 48 | 57 | host=REDIS_CONFIG.get('host', 'localhost'), |
| 49 | 58 | port=REDIS_CONFIG.get('port', 6479), |
| 50 | 59 | password=REDIS_CONFIG.get('password'), |
| 60 | + db=db, | |
| 51 | 61 | decode_responses=True, |
| 52 | 62 | socket_timeout=10, |
| 53 | 63 | socket_connect_timeout=10, |
| ... | ... | @@ -67,36 +77,51 @@ def format_bytes(bytes_size): |
| 67 | 77 | bytes_size /= 1024.0 |
| 68 | 78 | return f"{bytes_size:.2f} PB" |
| 69 | 79 | |
| 70 | -def get_key_memory_usage(client, key): | |
| 71 | - """获取单个 key 的内存占用量(字节)""" | |
| 80 | +def get_key_memory_usage(client, key, use_real_memory=True): | |
| 81 | + """ | |
| 82 | + 获取单个 key 的内存占用量(字节) | |
| 83 | + | |
| 84 | + Args: | |
| 85 | + client: Redis 客户端 | |
| 86 | + key: key 名称 | |
| 87 | + use_real_memory: 是否使用真实的 MEMORY USAGE 命令(True=真实,False=估算) | |
| 88 | + | |
| 89 | + Returns: | |
| 90 | + 内存占用量(字节) | |
| 91 | + """ | |
| 72 | 92 | try: |
| 73 | - # 使用 MEMORY USAGE 命令(Redis 4.0+) | |
| 74 | - try: | |
| 75 | - memory = client.execute_command('MEMORY', 'USAGE', key) | |
| 76 | - return memory if memory else 0 | |
| 77 | - except: | |
| 78 | - # 如果 MEMORY USAGE 不可用,使用估算方法 | |
| 79 | - # 获取 key 和 value 的大小 | |
| 80 | - key_size = len(key.encode('utf-8')) | |
| 81 | - | |
| 82 | - # 获取 value | |
| 83 | - value = client.get(key) | |
| 84 | - if value: | |
| 85 | - value_size = len(value.encode('utf-8')) | |
| 86 | - else: | |
| 87 | - # 尝试获取其他类型 | |
| 88 | - ttl = client.ttl(key) | |
| 89 | - if ttl == -2: # key 不存在 | |
| 90 | - return 0 | |
| 91 | - # 估算:key + 基础开销 | |
| 92 | - value_size = 0 | |
| 93 | - | |
| 94 | - # Redis 内存开销估算(粗略) | |
| 95 | - # key 对象开销: ~48 bytes | |
| 96 | - # value 对象开销: ~24 bytes | |
| 97 | - # 其他开销: ~100 bytes | |
| 98 | - overhead = 48 + 24 + 100 | |
| 99 | - return key_size + value_size + overhead | |
| 93 | + if use_real_memory: | |
| 94 | + # 使用 MEMORY USAGE 命令(Redis 4.0+)- 这是真实的内存占用 | |
| 95 | + try: | |
| 96 | + memory = client.execute_command('MEMORY', 'USAGE', key) | |
| 97 | + return memory if memory else 0 | |
| 98 | + except: | |
| 99 | + # 如果 MEMORY USAGE 不可用,降级到估算方法 | |
| 100 | + pass | |
| 101 | + | |
| 102 | + # 估算方法(不够准确,但速度快) | |
| 103 | + # 获取 key 和 value 的大小 | |
| 104 | + key_size = len(key.encode('utf-8')) | |
| 105 | + | |
| 106 | + # 获取 value | |
| 107 | + value = client.get(key) | |
| 108 | + if value: | |
| 109 | + value_size = len(value.encode('utf-8')) | |
| 110 | + else: | |
| 111 | + # 尝试获取其他类型 | |
| 112 | + ttl = client.ttl(key) | |
| 113 | + if ttl == -2: # key 不存在 | |
| 114 | + return 0 | |
| 115 | + # 估算:key + 基础开销 | |
| 116 | + value_size = 0 | |
| 117 | + | |
| 118 | + # Redis 内存开销估算(粗略) | |
| 119 | + # key 对象开销: ~48 bytes | |
| 120 | + # value 对象开销: ~24 bytes | |
| 121 | + # 其他开销: ~100 bytes | |
| 122 | + # 注意:这个估算不准确,特别是对于复杂数据结构(hash、set、zset等) | |
| 123 | + overhead = 48 + 24 + 100 | |
| 124 | + return key_size + value_size + overhead | |
| 100 | 125 | except Exception as e: |
| 101 | 126 | return 0 |
| 102 | 127 | |
| ... | ... | @@ -111,8 +136,19 @@ def scan_all_keys(client, pattern="*"): |
| 111 | 136 | break |
| 112 | 137 | return keys |
| 113 | 138 | |
| 114 | -def analyze_cache_by_prefix(client): | |
| 139 | +def analyze_cache_by_prefix(client, args=None, db_num=0): | |
| 115 | 140 | """按前缀分析缓存""" |
| 141 | + if args is None: | |
| 142 | + class Args: | |
| 143 | + real = False | |
| 144 | + sample_size = 100 | |
| 145 | + args = Args() | |
| 146 | + | |
| 147 | + # 显示当前数据库 | |
| 148 | + if db_num > 0: | |
| 149 | + print(f"\n{'='*60}") | |
| 150 | + print(f"数据库 {db_num}") | |
| 151 | + print(f"{'='*60}\n") | |
| 116 | 152 | print("=" * 60) |
| 117 | 153 | print("扫描 Redis 中的所有 key...") |
| 118 | 154 | print("=" * 60) |
| ... | ... | @@ -154,39 +190,67 @@ def analyze_cache_by_prefix(client): |
| 154 | 190 | print("=" * 60) |
| 155 | 191 | print("计算内存占用量...") |
| 156 | 192 | print("=" * 60) |
| 193 | + print("注意:") | |
| 194 | + print(" - 如果 key 数量 > 100,会采样前 100 个进行估算") | |
| 195 | + print(" - 优先使用 Redis MEMORY USAGE 命令(真实值)") | |
| 196 | + print(" - 如果 MEMORY USAGE 不可用,会使用估算方法(不准确)") | |
| 197 | + print(" - 估算方法只计算 key+value 大小,不包括 Redis 内部数据结构开销") | |
| 198 | + print() | |
| 199 | + | |
| 200 | + # 测试是否支持 MEMORY USAGE | |
| 201 | + test_key = all_keys[0] if all_keys else None | |
| 202 | + supports_memory_usage = False | |
| 203 | + if test_key: | |
| 204 | + try: | |
| 205 | + client.execute_command('MEMORY', 'USAGE', test_key) | |
| 206 | + supports_memory_usage = True | |
| 207 | + print("✅ Redis 支持 MEMORY USAGE 命令,将使用真实内存值") | |
| 208 | + except: | |
| 209 | + print("⚠️ Redis 不支持 MEMORY USAGE 命令,将使用估算方法(可能不准确)") | |
| 210 | + print() | |
| 157 | 211 | |
| 158 | 212 | prefix_memory = {} |
| 159 | 213 | for prefix, stats in prefix_stats.items(): |
| 160 | 214 | print(f" 计算 {prefix}:* 的内存...") |
| 161 | 215 | total_memory = 0 |
| 162 | - sample_count = min(100, stats['count']) # 采样前 100 个 | |
| 216 | + # 如果指定了 --real,且数量不太大,计算全部 | |
| 217 | + if args.real and stats['count'] <= 10000: | |
| 218 | + sample_count = stats['count'] | |
| 219 | + else: | |
| 220 | + sample_count = min(args.sample_size, stats['count']) # 采样 | |
| 163 | 221 | |
| 164 | 222 | # 如果数量较少,全部计算;否则采样计算 |
| 165 | 223 | if stats['count'] <= 100: |
| 166 | 224 | keys_to_check = stats['keys'] |
| 225 | + is_sampled = False | |
| 167 | 226 | else: |
| 168 | 227 | # 采样计算 |
| 169 | 228 | import random |
| 170 | 229 | keys_to_check = random.sample(stats['keys'], sample_count) |
| 230 | + is_sampled = True | |
| 171 | 231 | |
| 172 | 232 | for key in keys_to_check: |
| 173 | - memory = get_key_memory_usage(client, key) | |
| 233 | + memory = get_key_memory_usage(client, key, use_real_memory=supports_memory_usage) | |
| 174 | 234 | total_memory += memory |
| 175 | 235 | |
| 176 | 236 | # 如果是采样,估算总内存 |
| 177 | - if stats['count'] > sample_count: | |
| 237 | + if is_sampled: | |
| 178 | 238 | avg_memory = total_memory / sample_count |
| 179 | 239 | estimated_total = avg_memory * stats['count'] |
| 180 | 240 | prefix_memory[prefix] = { |
| 181 | 241 | 'memory': estimated_total, |
| 182 | 242 | 'is_estimated': True, |
| 183 | - 'sample_count': sample_count | |
| 243 | + 'is_sampled': True, | |
| 244 | + 'sample_count': sample_count, | |
| 245 | + 'uses_real_memory': supports_memory_usage | |
| 184 | 246 | } |
| 185 | 247 | else: |
| 186 | 248 | prefix_memory[prefix] = { |
| 187 | 249 | 'memory': total_memory, |
| 188 | 250 | 'is_estimated': False, |
| 189 | - 'sample_count': stats['count'] | |
| 251 | + 'is_sampled': False, | |
| 252 | + 'sample_count': stats['count'], | |
| 253 | + 'uses_real_memory': supports_memory_usage | |
| 190 | 254 | } |
| 191 | 255 | |
| 192 | 256 | # 显示统计结果 |
| ... | ... | @@ -203,15 +267,30 @@ def analyze_cache_by_prefix(client): |
| 203 | 267 | |
| 204 | 268 | total_memory_all = sum(pm['memory'] for pm in prefix_memory.values()) |
| 205 | 269 | |
| 206 | - print(f"{'前缀':<20} {'条目数':>12} {'内存占用量':>20} {'占比':>10} {'说明'}") | |
| 207 | - print("-" * 80) | |
| 270 | + print(f"{'前缀':<20} {'条目数':>12} {'内存占用量和计算方式':>50} {'占比':>10} {'说明'}") | |
| 271 | + print("-" * 120) | |
| 208 | 272 | |
| 209 | 273 | for prefix, stats in sorted_prefixes: |
| 210 | 274 | memory_info = prefix_memory[prefix] |
| 211 | 275 | memory = memory_info['memory'] |
| 212 | - memory_str = format_bytes(memory) | |
| 213 | - if memory_info['is_estimated']: | |
| 214 | - memory_str += f" (估算, 采样 {memory_info['sample_count']})" | |
| 276 | + | |
| 277 | + # 计算平均每条 key 的大小 | |
| 278 | + avg_memory_per_key = memory / stats['count'] if stats['count'] > 0 else 0 | |
| 279 | + avg_memory_str = format_bytes(avg_memory_per_key) | |
| 280 | + | |
| 281 | + # 标注内存计算方式和结果 | |
| 282 | + if memory_info['is_sampled']: | |
| 283 | + if memory_info['uses_real_memory']: | |
| 284 | + calc_method = f"采样估算(采样{memory_info['sample_count']}个, 使用真实MEMORY USAGE)" | |
| 285 | + else: | |
| 286 | + calc_method = f"采样估算(采样{memory_info['sample_count']}个, 估算方法)" | |
| 287 | + else: | |
| 288 | + if memory_info['uses_real_memory']: | |
| 289 | + calc_method = "真实值(全部计算, 使用MEMORY USAGE)" | |
| 290 | + else: | |
| 291 | + calc_method = "估算值(全部计算, 估算方法)" | |
| 292 | + | |
| 293 | + memory_str = f"{format_bytes(memory)} | 每条: {avg_memory_str} | {calc_method}" | |
| 215 | 294 | |
| 216 | 295 | percentage = (memory / total_memory_all * 100) if total_memory_all > 0 else 0 |
| 217 | 296 | |
| ... | ... | @@ -228,10 +307,14 @@ def analyze_cache_by_prefix(client): |
| 228 | 307 | else: |
| 229 | 308 | description = "其他" |
| 230 | 309 | |
| 231 | - print(f"{prefix:<20} {stats['count']:>12,} {memory_str:>30} {percentage:>9.1f}% {description}") | |
| 310 | + # 格式化输出,内存信息可能很长,需要适当处理 | |
| 311 | + memory_display = memory_str[:70] + "..." if len(memory_str) > 70 else memory_str | |
| 312 | + print(f"{prefix:<20} {stats['count']:>12,} {memory_display:<70} {percentage:>9.1f}% {description}") | |
| 232 | 313 | |
| 233 | - print("-" * 80) | |
| 234 | - print(f"{'总计':<20} {total_keys:>12,} {format_bytes(total_memory_all):>30} {'100.0':>9}%") | |
| 314 | + print("-" * 120) | |
| 315 | + avg_total = total_memory_all / total_keys if total_keys > 0 else 0 | |
| 316 | + total_display = f"{format_bytes(total_memory_all)} | 每条: {format_bytes(avg_total)}" | |
| 317 | + print(f"{'总计':<20} {total_keys:>12,} {total_display:<70} {'100.0':>9}%") | |
| 235 | 318 | |
| 236 | 319 | # 显示详细信息 |
| 237 | 320 | print("\n" + "=" * 60) |
| ... | ... | @@ -239,8 +322,25 @@ def analyze_cache_by_prefix(client): |
| 239 | 322 | print("=" * 60) |
| 240 | 323 | |
| 241 | 324 | for prefix, stats in sorted_prefixes[:10]: # 只显示前 10 个 |
| 325 | + mem_info = prefix_memory[prefix] | |
| 326 | + avg_per_key = mem_info['memory'] / stats['count'] if stats['count'] > 0 else 0 | |
| 327 | + | |
| 242 | 328 | print(f"\n{prefix}:* ({stats['count']:,} 个 key)") |
| 243 | - print(f" 内存: {format_bytes(prefix_memory[prefix]['memory'])}") | |
| 329 | + print(f" 总内存: {format_bytes(mem_info['memory'])}") | |
| 330 | + print(f" 每条 key 平均: {format_bytes(avg_per_key)}") | |
| 331 | + | |
| 332 | + # 显示计算方式 | |
| 333 | + if mem_info['is_sampled']: | |
| 334 | + if mem_info['uses_real_memory']: | |
| 335 | + print(f" 计算方式: 采样估算(采样 {mem_info['sample_count']} 个,使用真实 MEMORY USAGE)") | |
| 336 | + else: | |
| 337 | + print(f" 计算方式: 采样估算(采样 {mem_info['sample_count']} 个,使用估算方法)") | |
| 338 | + else: | |
| 339 | + if mem_info['uses_real_memory']: | |
| 340 | + print(f" 计算方式: 真实值(全部计算,使用 MEMORY USAGE)") | |
| 341 | + else: | |
| 342 | + print(f" 计算方式: 估算值(全部计算,使用估算方法)") | |
| 343 | + | |
| 244 | 344 | print(f" 示例 key:") |
| 245 | 345 | for sample_key in stats['sample_keys'][:3]: |
| 246 | 346 | ttl = client.ttl(sample_key) |
| ... | ... | @@ -281,10 +381,13 @@ def analyze_cache_by_prefix(client): |
| 281 | 381 | import traceback |
| 282 | 382 | traceback.print_exc() |
| 283 | 383 | |
| 284 | -def analyze_specific_prefixes(client, prefixes): | |
| 384 | +def analyze_specific_prefixes(client, prefixes, db_num=0): | |
| 285 | 385 | """分析指定的前缀""" |
| 286 | 386 | print("=" * 60) |
| 287 | - print(f"分析指定前缀: {', '.join(prefixes)}") | |
| 387 | + if db_num > 0: | |
| 388 | + print(f"数据库 {db_num} - 分析指定前缀: {', '.join(prefixes)}") | |
| 389 | + else: | |
| 390 | + print(f"分析指定前缀: {', '.join(prefixes)}") | |
| 288 | 391 | print("=" * 60) |
| 289 | 392 | |
| 290 | 393 | for prefix in prefixes: |
| ... | ... | @@ -315,6 +418,77 @@ def analyze_specific_prefixes(client, prefixes): |
| 315 | 418 | else: |
| 316 | 419 | print(f" 内存占用量: {format_bytes(total_memory)}") |
| 317 | 420 | |
| 421 | +def get_all_databases(): | |
| 422 | + """获取所有有数据的数据库列表""" | |
| 423 | + databases = [] | |
| 424 | + # Redis 默认有 16 个数据库(0-15) | |
| 425 | + for db_num in range(16): | |
| 426 | + try: | |
| 427 | + client = get_redis_client(db=db_num) | |
| 428 | + client.ping() | |
| 429 | + # 检查是否有 key | |
| 430 | + key_count = client.dbsize() | |
| 431 | + if key_count > 0: | |
| 432 | + databases.append(db_num) | |
| 433 | + except: | |
| 434 | + pass | |
| 435 | + return databases | |
| 436 | + | |
| 437 | +def analyze_all_databases(args): | |
| 438 | + """分析所有数据库""" | |
| 439 | + print("=" * 60) | |
| 440 | + print("扫描所有数据库...") | |
| 441 | + print("=" * 60) | |
| 442 | + | |
| 443 | + databases = get_all_databases() | |
| 444 | + | |
| 445 | + if not databases: | |
| 446 | + print("未找到有数据的数据库") | |
| 447 | + return | |
| 448 | + | |
| 449 | + print(f"发现 {len(databases)} 个有数据的数据库: {databases}\n") | |
| 450 | + | |
| 451 | + # 汇总统计 | |
| 452 | + total_stats_by_prefix = defaultdict(lambda: {'count': 0, 'memory': 0, 'dbs': []}) | |
| 453 | + total_keys_all_db = 0 | |
| 454 | + total_memory_all_db = 0 | |
| 455 | + | |
| 456 | + for db_num in databases: | |
| 457 | + try: | |
| 458 | + client = get_redis_client(db=db_num) | |
| 459 | + client.ping() | |
| 460 | + db_size = client.dbsize() | |
| 461 | + | |
| 462 | + print(f"\n{'='*60}") | |
| 463 | + print(f"数据库 {db_num} (共 {db_size:,} 个 key)") | |
| 464 | + print(f"{'='*60}") | |
| 465 | + | |
| 466 | + if args.prefix: | |
| 467 | + analyze_specific_prefixes(client, args.prefix, db_num=db_num) | |
| 468 | + else: | |
| 469 | + # 分析当前数据库 | |
| 470 | + analyze_cache_by_prefix(client, args, db_num=db_num) | |
| 471 | + | |
| 472 | + # 收集统计信息(简化版,只统计 key 数量) | |
| 473 | + total_keys_all_db += db_size | |
| 474 | + | |
| 475 | + except Exception as e: | |
| 476 | + print(f"❌ 数据库 {db_num} 分析失败: {e}") | |
| 477 | + import traceback | |
| 478 | + traceback.print_exc() | |
| 479 | + continue | |
| 480 | + | |
| 481 | + # 显示汇总统计 | |
| 482 | + if not args.prefix: | |
| 483 | + print("\n" + "=" * 60) | |
| 484 | + print("所有数据库汇总") | |
| 485 | + print("=" * 60) | |
| 486 | + print(f"有数据的数据库: {len(databases)} 个 ({', '.join(map(str, databases))})") | |
| 487 | + print(f"总 key 数量: {total_keys_all_db:,}") | |
| 488 | + print(f"\n提示: 要查看详细的内存统计,请分别运行每个数据库:") | |
| 489 | + for db_num in databases: | |
| 490 | + print(f" python scripts/redis/check_cache_stats.py --db {db_num}") | |
| 491 | + | |
| 318 | 492 | def main(): |
| 319 | 493 | """主函数""" |
| 320 | 494 | import argparse |
| ... | ... | @@ -322,6 +496,10 @@ def main(): |
| 322 | 496 | parser = argparse.ArgumentParser(description='统计 Redis 缓存的条目数和内存占用量') |
| 323 | 497 | parser.add_argument('--prefix', nargs='+', help='指定要分析的前缀(如: trans embedding)') |
| 324 | 498 | parser.add_argument('--all', action='store_true', help='分析所有前缀(默认)') |
| 499 | + parser.add_argument('--real', action='store_true', help='计算所有 key 的真实内存(很慢,但准确)') | |
| 500 | + parser.add_argument('--sample-size', type=int, default=100, help='采样大小(默认 100,仅当 key 数量 > 采样大小时使用)') | |
| 501 | + parser.add_argument('--db', type=int, help='指定数据库编号(0-15),默认只统计 db 0') | |
| 502 | + parser.add_argument('--all-db', action='store_true', help='统计所有数据库(0-15)') | |
| 325 | 503 | |
| 326 | 504 | args = parser.parse_args() |
| 327 | 505 | |
| ... | ... | @@ -330,22 +508,38 @@ def main(): |
| 330 | 508 | print(f"检查时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
| 331 | 509 | print() |
| 332 | 510 | |
| 511 | + # 如果指定了 --all-db,分析所有数据库 | |
| 512 | + if args.all_db: | |
| 513 | + analyze_all_databases(args) | |
| 514 | + print("\n" + "=" * 60) | |
| 515 | + print("统计完成") | |
| 516 | + print("=" * 60) | |
| 517 | + return | |
| 518 | + | |
| 519 | + # 否则分析指定或默认的数据库 | |
| 520 | + db_num = args.db if args.db is not None else 0 | |
| 521 | + | |
| 333 | 522 | try: |
| 334 | - client = get_redis_client() | |
| 523 | + client = get_redis_client(db=db_num) | |
| 335 | 524 | client.ping() |
| 336 | - print("✅ Redis 连接成功\n") | |
| 525 | + if db_num > 0: | |
| 526 | + print(f"✅ Redis 连接成功(数据库 {db_num})\n") | |
| 527 | + else: | |
| 528 | + print("✅ Redis 连接成功(默认数据库 0)\n") | |
| 337 | 529 | except Exception as e: |
| 338 | 530 | print(f"❌ Redis 连接失败: {e}") |
| 339 | 531 | print(f"\n请检查:") |
| 340 | 532 | print(f" - Host: {REDIS_CONFIG.get('host', 'localhost')}") |
| 341 | 533 | print(f" - Port: {REDIS_CONFIG.get('port', 6479)}") |
| 342 | 534 | print(f" - Password: {'已配置' if REDIS_CONFIG.get('password') else '未配置'}") |
| 535 | + print(f" - Database: {db_num}") | |
| 343 | 536 | return |
| 344 | 537 | |
| 345 | 538 | if args.prefix: |
| 346 | - analyze_specific_prefixes(client, args.prefix) | |
| 539 | + analyze_specific_prefixes(client, args.prefix, db_num=db_num) | |
| 347 | 540 | else: |
| 348 | - analyze_cache_by_prefix(client) | |
| 541 | + # 传递参数到分析函数 | |
| 542 | + analyze_cache_by_prefix(client, args, db_num=db_num) | |
| 349 | 543 | |
| 350 | 544 | print("\n" + "=" * 60) |
| 351 | 545 | print("统计完成") | ... | ... |
| ... | ... | @@ -0,0 +1,235 @@ |
| 1 | +#!/usr/bin/env python3 | |
| 2 | +""" | |
| 3 | +查找 Redis 中占用内存的主要 key | |
| 4 | + | |
| 5 | +分析为什么统计的缓存内存和总内存差异很大 | |
| 6 | +""" | |
| 7 | + | |
| 8 | +import redis | |
| 9 | +import os | |
| 10 | +import sys | |
| 11 | +from collections import defaultdict | |
| 12 | +from pathlib import Path | |
| 13 | +from datetime import datetime | |
| 14 | + | |
| 15 | +# 添加项目路径(文件在 scripts/redis/ 目录下,需要向上三级到项目根目录) | |
| 16 | +project_root = Path(__file__).parent.parent.parent | |
| 17 | +sys.path.insert(0, str(project_root)) | |
| 18 | + | |
| 19 | +from config.env_config import REDIS_CONFIG | |
| 20 | + | |
| 21 | +def get_redis_client(): | |
| 22 | + """获取 Redis 客户端""" | |
| 23 | + return redis.Redis( | |
| 24 | + host=REDIS_CONFIG.get('host', 'localhost'), | |
| 25 | + port=REDIS_CONFIG.get('port', 6479), | |
| 26 | + password=REDIS_CONFIG.get('password'), | |
| 27 | + decode_responses=True, | |
| 28 | + socket_timeout=10, | |
| 29 | + socket_connect_timeout=10, | |
| 30 | + ) | |
| 31 | + | |
| 32 | +def format_bytes(bytes_size): | |
| 33 | + """格式化字节数为可读格式""" | |
| 34 | + for unit in ['B', 'KB', 'MB', 'GB', 'TB']: | |
| 35 | + if bytes_size < 1024.0: | |
| 36 | + return f"{bytes_size:.2f} {unit}" | |
| 37 | + bytes_size /= 1024.0 | |
| 38 | + return f"{bytes_size:.2f} PB" | |
| 39 | + | |
| 40 | +def get_key_memory_usage(client, key): | |
| 41 | + """获取单个 key 的内存占用量(字节)""" | |
| 42 | + try: | |
| 43 | + # 使用 MEMORY USAGE 命令(Redis 4.0+) | |
| 44 | + try: | |
| 45 | + memory = client.execute_command('MEMORY', 'USAGE', key) | |
| 46 | + return memory if memory else 0 | |
| 47 | + except: | |
| 48 | + # 如果 MEMORY USAGE 不可用,使用估算方法 | |
| 49 | + key_size = len(key.encode('utf-8')) | |
| 50 | + value = client.get(key) | |
| 51 | + if value: | |
| 52 | + value_size = len(value.encode('utf-8')) | |
| 53 | + else: | |
| 54 | + value_size = 0 | |
| 55 | + overhead = 48 + 24 + 100 | |
| 56 | + return key_size + value_size + overhead | |
| 57 | + except Exception as e: | |
| 58 | + return 0 | |
| 59 | + | |
| 60 | +def analyze_all_keys(client, top_n=50): | |
| 61 | + """分析所有 key 的内存占用,找出占用最多的""" | |
| 62 | + print("=" * 60) | |
| 63 | + print("分析所有 key 的内存占用") | |
| 64 | + print("=" * 60) | |
| 65 | + | |
| 66 | + try: | |
| 67 | + # 获取总内存信息 | |
| 68 | + info = client.info('memory') | |
| 69 | + used_memory = info.get('used_memory', 0) | |
| 70 | + used_memory_human = info.get('used_memory_human', '0B') | |
| 71 | + | |
| 72 | + print(f"Redis 总使用内存: {used_memory_human} ({used_memory:,} bytes)\n") | |
| 73 | + | |
| 74 | + # 扫描所有 key | |
| 75 | + print("扫描所有 key...") | |
| 76 | + all_keys = [] | |
| 77 | + cursor = 0 | |
| 78 | + while True: | |
| 79 | + cursor, batch = client.scan(cursor, count=1000) | |
| 80 | + all_keys.extend(batch) | |
| 81 | + if cursor == 0: | |
| 82 | + break | |
| 83 | + | |
| 84 | + total_keys = len(all_keys) | |
| 85 | + print(f"总 key 数量: {total_keys:,}\n") | |
| 86 | + | |
| 87 | + # 分析 key 的命名模式 | |
| 88 | + print("分析 key 命名模式...") | |
| 89 | + no_prefix_count = 0 | |
| 90 | + prefix_patterns = defaultdict(int) | |
| 91 | + | |
| 92 | + for key in all_keys: | |
| 93 | + if ':' in key: | |
| 94 | + prefix = key.split(':', 1)[0] | |
| 95 | + prefix_patterns[prefix] += 1 | |
| 96 | + else: | |
| 97 | + no_prefix_count += 1 | |
| 98 | + | |
| 99 | + print(f" 无前缀的 key: {no_prefix_count:,}") | |
| 100 | + print(f" 有前缀的 key: {total_keys - no_prefix_count:,}") | |
| 101 | + print(f" 不同前缀数量: {len(prefix_patterns):,}\n") | |
| 102 | + | |
| 103 | + # 显示所有前缀 | |
| 104 | + print("所有前缀列表:") | |
| 105 | + sorted_prefixes = sorted(prefix_patterns.items(), key=lambda x: x[1], reverse=True) | |
| 106 | + for prefix, count in sorted_prefixes[:20]: | |
| 107 | + print(f" {prefix}:* - {count:,} 个 key") | |
| 108 | + if len(sorted_prefixes) > 20: | |
| 109 | + print(f" ... 还有 {len(sorted_prefixes) - 20} 个前缀") | |
| 110 | + | |
| 111 | + # 采样分析内存占用 | |
| 112 | + print(f"\n采样分析内存占用(采样前 {min(1000, total_keys)} 个 key)...") | |
| 113 | + | |
| 114 | + key_memories = [] | |
| 115 | + sample_size = min(1000, total_keys) | |
| 116 | + import random | |
| 117 | + sample_keys = random.sample(all_keys, sample_size) if total_keys > sample_size else all_keys | |
| 118 | + | |
| 119 | + processed = 0 | |
| 120 | + for key in sample_keys: | |
| 121 | + memory = get_key_memory_usage(client, key) | |
| 122 | + if memory > 0: | |
| 123 | + key_memories.append((key, memory)) | |
| 124 | + processed += 1 | |
| 125 | + if processed % 100 == 0: | |
| 126 | + print(f" 已处理: {processed}/{sample_size}") | |
| 127 | + | |
| 128 | + # 按内存排序 | |
| 129 | + key_memories.sort(key=lambda x: x[1], reverse=True) | |
| 130 | + | |
| 131 | + # 计算采样统计 | |
| 132 | + total_sample_memory = sum(mem for _, mem in key_memories) | |
| 133 | + avg_memory = total_sample_memory / len(key_memories) if key_memories else 0 | |
| 134 | + estimated_total_memory = avg_memory * total_keys | |
| 135 | + | |
| 136 | + print(f"\n采样统计:") | |
| 137 | + print(f" 采样 key 数量: {len(key_memories):,}") | |
| 138 | + print(f" 采样总内存: {format_bytes(total_sample_memory)}") | |
| 139 | + print(f" 平均每个 key 内存: {format_bytes(avg_memory)}") | |
| 140 | + print(f" 估算所有 key 总内存: {format_bytes(estimated_total_memory)}") | |
| 141 | + print(f" 实际 Redis 使用内存: {format_bytes(used_memory)}") | |
| 142 | + print(f" 差异: {format_bytes(used_memory - estimated_total_memory)}") | |
| 143 | + | |
| 144 | + # 显示占用内存最多的 key | |
| 145 | + print(f"\n占用内存最多的 {top_n} 个 key:") | |
| 146 | + print(f"{'排名':<6} {'内存':<15} {'Key'}") | |
| 147 | + print("-" * 80) | |
| 148 | + | |
| 149 | + for i, (key, memory) in enumerate(key_memories[:top_n], 1): | |
| 150 | + key_display = key[:60] + "..." if len(key) > 60 else key | |
| 151 | + print(f"{i:<6} {format_bytes(memory):<15} {key_display}") | |
| 152 | + | |
| 153 | + # 分析内存差异的原因 | |
| 154 | + print("\n" + "=" * 60) | |
| 155 | + print("内存差异分析") | |
| 156 | + print("=" * 60) | |
| 157 | + | |
| 158 | + difference = used_memory - estimated_total_memory | |
| 159 | + difference_percent = (difference / used_memory * 100) if used_memory > 0 else 0 | |
| 160 | + | |
| 161 | + print(f"实际内存: {format_bytes(used_memory)}") | |
| 162 | + print(f"估算 key 内存: {format_bytes(estimated_total_memory)}") | |
| 163 | + print(f"差异: {format_bytes(difference)} ({difference_percent:.1f}%)") | |
| 164 | + | |
| 165 | + print("\n可能的原因:") | |
| 166 | + print("1. Redis 内部数据结构开销(hash table、skiplist 等)") | |
| 167 | + print("2. 内存碎片") | |
| 168 | + print("3. Redis 进程本身的内存占用") | |
| 169 | + print("4. 其他数据结构(如 list、set、zset、hash)的内存开销更大") | |
| 170 | + print("5. 采样估算的误差") | |
| 171 | + | |
| 172 | + # 检查是否有大 value | |
| 173 | + print(f"\n检查是否有超大 value(> 1MB)...") | |
| 174 | + large_values = [] | |
| 175 | + for key, memory in key_memories[:100]: # 检查前 100 个最大的 | |
| 176 | + if memory > 1024 * 1024: # > 1MB | |
| 177 | + large_values.append((key, memory)) | |
| 178 | + | |
| 179 | + if large_values: | |
| 180 | + print(f"发现 {len(large_values)} 个超大 value (> 1MB):") | |
| 181 | + for key, memory in large_values[:10]: | |
| 182 | + key_display = key[:60] + "..." if len(key) > 60 else key | |
| 183 | + print(f" {format_bytes(memory):<15} {key_display}") | |
| 184 | + else: | |
| 185 | + print(" 未发现超大 value") | |
| 186 | + | |
| 187 | + # 检查 key 类型分布 | |
| 188 | + print(f"\n检查 key 类型分布(采样前 1000 个)...") | |
| 189 | + type_distribution = defaultdict(int) | |
| 190 | + for key in sample_keys[:1000]: | |
| 191 | + try: | |
| 192 | + key_type = client.type(key) | |
| 193 | + type_distribution[key_type] += 1 | |
| 194 | + except: | |
| 195 | + pass | |
| 196 | + | |
| 197 | + print("Key 类型分布:") | |
| 198 | + for key_type, count in sorted(type_distribution.items(), key=lambda x: x[1], reverse=True): | |
| 199 | + print(f" {key_type}: {count}") | |
| 200 | + | |
| 201 | + except Exception as e: | |
| 202 | + print(f"❌ 分析失败: {e}") | |
| 203 | + import traceback | |
| 204 | + traceback.print_exc() | |
| 205 | + | |
| 206 | +def main(): | |
| 207 | + """主函数""" | |
| 208 | + import argparse | |
| 209 | + | |
| 210 | + parser = argparse.ArgumentParser(description='查找 Redis 中占用内存的主要 key') | |
| 211 | + parser.add_argument('--top', type=int, default=50, help='显示占用内存最多的 N 个 key(默认 50)') | |
| 212 | + | |
| 213 | + args = parser.parse_args() | |
| 214 | + | |
| 215 | + print("Redis 内存占用分析工具") | |
| 216 | + print("=" * 60) | |
| 217 | + print(f"检查时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| 218 | + print() | |
| 219 | + | |
| 220 | + try: | |
| 221 | + client = get_redis_client() | |
| 222 | + client.ping() | |
| 223 | + print("✅ Redis 连接成功\n") | |
| 224 | + except Exception as e: | |
| 225 | + print(f"❌ Redis 连接失败: {e}") | |
| 226 | + return | |
| 227 | + | |
| 228 | + analyze_all_keys(client, top_n=args.top) | |
| 229 | + | |
| 230 | + print("\n" + "=" * 60) | |
| 231 | + print("分析完成") | |
| 232 | + print("=" * 60) | |
| 233 | + | |
| 234 | +if __name__ == "__main__": | |
| 235 | + main() | ... | ... |