add scripts :

scripts/redis/

add scripts :
scripts/redis/
tangwang
1 parent 153a592e
Showing 2 changed files with 486 additions and 57 deletions Show diff stats
scripts/check_cache_stats.py -> scripts/redis/check_cache_stats.py
scripts/redis/find_memory_usage.py
@@ -6,11 +6,20 @@
  
 使用方法：
  
-直接使用：
-python scripts/check_cache_stats.py 
+直接使用（默认数据库 0）：
+python scripts/redis/check_cache_stats.py 
  
-或者 只统计以下三种前缀：
-python scripts/check_cache_stats.py --prefix trans embedding product
+统计所有数据库：
+python scripts/redis/check_cache_stats.py --all-db
+
+统计指定数据库：
+python scripts/redis/check_cache_stats.py --db 1
+
+只统计以下三种前缀：
+python scripts/redis/check_cache_stats.py --prefix trans embedding product
+
+统计所有数据库的指定前缀：
+python scripts/redis/check_cache_stats.py --all-db --prefix trans embedding
  
  
  
@@ -36,18 +45,19 @@ from collections import defaultdict
 from pathlib import Path
 from datetime import datetime
  
-# 添加项目路径
-project_root = Path(__file__).parent.parent
+# 添加项目路径（文件在 scripts/redis/ 目录下，需要向上三级到项目根目录）
+project_root = Path(__file__).parent.parent.parent
 sys.path.insert(0, str(project_root))
  
 from config.env_config import REDIS_CONFIG
  
-def get_redis_client():
+def get_redis_client(db=0):
     """获取 Redis 客户端"""
     return redis.Redis(
         host=REDIS_CONFIG.get('host', 'localhost'),
         port=REDIS_CONFIG.get('port', 6479),
         password=REDIS_CONFIG.get('password'),
+        db=db,
         decode_responses=True,
         socket_timeout=10,
         socket_connect_timeout=10,
@@ -67,36 +77,51 @@ def format_bytes(bytes_size):
         bytes_size /= 1024.0
     return f"{bytes_size:.2f} PB"
  
-def get_key_memory_usage(client, key):
-    """获取单个 key 的内存占用量（字节）"""
+def get_key_memory_usage(client, key, use_real_memory=True):
+    """
+    获取单个 key 的内存占用量（字节）
+    
+    Args:
+        client: Redis 客户端
+        key: key 名称
+        use_real_memory: 是否使用真实的 MEMORY USAGE 命令（True=真实，False=估算）
+    
+    Returns:
+        内存占用量（字节）
+    """
     try:
-        # 使用 MEMORY USAGE 命令（Redis 4.0+）
-        try:
-            memory = client.execute_command('MEMORY', 'USAGE', key)
-            return memory if memory else 0
-        except:
-            # 如果 MEMORY USAGE 不可用，使用估算方法
-            # 获取 key 和 value 的大小
-            key_size = len(key.encode('utf-8'))
-            
-            # 获取 value
-            value = client.get(key)
-            if value:
-                value_size = len(value.encode('utf-8'))
-            else:
-                # 尝试获取其他类型
-                ttl = client.ttl(key)
-                if ttl == -2:  # key 不存在
-                    return 0
-                # 估算：key + 基础开销
-                value_size = 0
-            
-            # Redis 内存开销估算（粗略）
-            # key 对象开销: ~48 bytes
-            # value 对象开销: ~24 bytes
-            # 其他开销: ~100 bytes
-            overhead = 48 + 24 + 100
-            return key_size + value_size + overhead
+        if use_real_memory:
+            # 使用 MEMORY USAGE 命令（Redis 4.0+）- 这是真实的内存占用
+            try:
+                memory = client.execute_command('MEMORY', 'USAGE', key)
+                return memory if memory else 0
+            except:
+                # 如果 MEMORY USAGE 不可用，降级到估算方法
+                pass
+        
+        # 估算方法（不够准确，但速度快）
+        # 获取 key 和 value 的大小
+        key_size = len(key.encode('utf-8'))
+        
+        # 获取 value
+        value = client.get(key)
+        if value:
+            value_size = len(value.encode('utf-8'))
+        else:
+            # 尝试获取其他类型
+            ttl = client.ttl(key)
+            if ttl == -2:  # key 不存在
+                return 0
+            # 估算：key + 基础开销
+            value_size = 0
+        
+        # Redis 内存开销估算（粗略）
+        # key 对象开销: ~48 bytes
+        # value 对象开销: ~24 bytes
+        # 其他开销: ~100 bytes
+        # 注意：这个估算不准确，特别是对于复杂数据结构（hash、set、zset等）
+        overhead = 48 + 24 + 100
+        return key_size + value_size + overhead
     except Exception as e:
         return 0
  
@@ -111,8 +136,19 @@ def scan_all_keys(client, pattern=&quot;*&quot;):
             break
     return keys
  
-def analyze_cache_by_prefix(client):
+def analyze_cache_by_prefix(client, args=None, db_num=0):
     """按前缀分析缓存"""
+    if args is None:
+        class Args:
+            real = False
+            sample_size = 100
+        args = Args()
+    
+    # 显示当前数据库
+    if db_num > 0:
+        print(f"\n{'='*60}")
+        print(f"数据库 {db_num}")
+        print(f"{'='*60}\n")
     print("=" * 60)
     print("扫描 Redis 中的所有 key...")
     print("=" * 60)
@@ -154,39 +190,67 @@ def analyze_cache_by_prefix(client):
         print("=" * 60)
         print("计算内存占用量...")
         print("=" * 60)
+        print("注意：")
+        print("  - 如果 key 数量 > 100，会采样前 100 个进行估算")
+        print("  - 优先使用 Redis MEMORY USAGE 命令（真实值）")
+        print("  - 如果 MEMORY USAGE 不可用，会使用估算方法（不准确）")
+        print("  - 估算方法只计算 key+value 大小，不包括 Redis 内部数据结构开销")
+        print()
+        
+        # 测试是否支持 MEMORY USAGE
+        test_key = all_keys[0] if all_keys else None
+        supports_memory_usage = False
+        if test_key:
+            try:
+                client.execute_command('MEMORY', 'USAGE', test_key)
+                supports_memory_usage = True
+                print("✅ Redis 支持 MEMORY USAGE 命令，将使用真实内存值")
+            except:
+                print("⚠️  Redis 不支持 MEMORY USAGE 命令，将使用估算方法（可能不准确）")
+        print()
  
         prefix_memory = {}
         for prefix, stats in prefix_stats.items():
             print(f"  计算 {prefix}:* 的内存...")
             total_memory = 0
-            sample_count = min(100, stats['count'])  # 采样前 100 个
+            # 如果指定了 --real，且数量不太大，计算全部
+            if args.real and stats['count'] <= 10000:
+                sample_count = stats['count']
+            else:
+                sample_count = min(args.sample_size, stats['count'])  # 采样
  
             # 如果数量较少，全部计算；否则采样计算
             if stats['count'] <= 100:
                 keys_to_check = stats['keys']
+                is_sampled = False
             else:
                 # 采样计算
                 import random
                 keys_to_check = random.sample(stats['keys'], sample_count)
+                is_sampled = True
  
             for key in keys_to_check:
-                memory = get_key_memory_usage(client, key)
+                memory = get_key_memory_usage(client, key, use_real_memory=supports_memory_usage)
                 total_memory += memory
  
             # 如果是采样，估算总内存
-            if stats['count'] > sample_count:
+            if is_sampled:
                 avg_memory = total_memory / sample_count
                 estimated_total = avg_memory * stats['count']
                 prefix_memory[prefix] = {
                     'memory': estimated_total,
                     'is_estimated': True,
-                    'sample_count': sample_count
+                    'is_sampled': True,
+                    'sample_count': sample_count,
+                    'uses_real_memory': supports_memory_usage
                 }
             else:
                 prefix_memory[prefix] = {
                     'memory': total_memory,
                     'is_estimated': False,
-                    'sample_count': stats['count']
+                    'is_sampled': False,
+                    'sample_count': stats['count'],
+                    'uses_real_memory': supports_memory_usage
                 }
  
         # 显示统计结果
@@ -203,15 +267,30 @@ def analyze_cache_by_prefix(client):
  
         total_memory_all = sum(pm['memory'] for pm in prefix_memory.values())
  
-        print(f"{'前缀':<20} {'条目数':>12} {'内存占用量':>20} {'占比':>10} {'说明'}")
-        print("-" * 80)
+        print(f"{'前缀':<20} {'条目数':>12} {'内存占用量和计算方式':>50} {'占比':>10} {'说明'}")
+        print("-" * 120)
  
         for prefix, stats in sorted_prefixes:
             memory_info = prefix_memory[prefix]
             memory = memory_info['memory']
-            memory_str = format_bytes(memory)
-            if memory_info['is_estimated']:
-                memory_str += f" (估算, 采样 {memory_info['sample_count']})"
+            
+            # 计算平均每条 key 的大小
+            avg_memory_per_key = memory / stats['count'] if stats['count'] > 0 else 0
+            avg_memory_str = format_bytes(avg_memory_per_key)
+            
+            # 标注内存计算方式和结果
+            if memory_info['is_sampled']:
+                if memory_info['uses_real_memory']:
+                    calc_method = f"采样估算(采样{memory_info['sample_count']}个, 使用真实MEMORY USAGE)"
+                else:
+                    calc_method = f"采样估算(采样{memory_info['sample_count']}个, 估算方法)"
+            else:
+                if memory_info['uses_real_memory']:
+                    calc_method = "真实值(全部计算, 使用MEMORY USAGE)"
+                else:
+                    calc_method = "估算值(全部计算, 估算方法)"
+            
+            memory_str = f"{format_bytes(memory)} | 每条: {avg_memory_str} | {calc_method}"
  
             percentage = (memory / total_memory_all * 100) if total_memory_all > 0 else 0
  
@@ -228,10 +307,14 @@ def analyze_cache_by_prefix(client):
             else:
                 description = "其他"
  
-            print(f"{prefix:<20} {stats['count']:>12,} {memory_str:>30} {percentage:>9.1f}%  {description}")
+            # 格式化输出，内存信息可能很长，需要适当处理
+            memory_display = memory_str[:70] + "..." if len(memory_str) > 70 else memory_str
+            print(f"{prefix:<20} {stats['count']:>12,} {memory_display:<70} {percentage:>9.1f}%  {description}")
  
-        print("-" * 80)
-        print(f"{'总计':<20} {total_keys:>12,} {format_bytes(total_memory_all):>30} {'100.0':>9}%")
+        print("-" * 120)
+        avg_total = total_memory_all / total_keys if total_keys > 0 else 0
+        total_display = f"{format_bytes(total_memory_all)} | 每条: {format_bytes(avg_total)}"
+        print(f"{'总计':<20} {total_keys:>12,} {total_display:<70} {'100.0':>9}%")
  
         # 显示详细信息
         print("\n" + "=" * 60)
@@ -239,8 +322,25 @@ def analyze_cache_by_prefix(client):
         print("=" * 60)
  
         for prefix, stats in sorted_prefixes[:10]:  # 只显示前 10 个
+            mem_info = prefix_memory[prefix]
+            avg_per_key = mem_info['memory'] / stats['count'] if stats['count'] > 0 else 0
+            
             print(f"\n{prefix}:* ({stats['count']:,} 个 key)")
-            print(f"  内存: {format_bytes(prefix_memory[prefix]['memory'])}")
+            print(f"  总内存: {format_bytes(mem_info['memory'])}")
+            print(f"  每条 key 平均: {format_bytes(avg_per_key)}")
+            
+            # 显示计算方式
+            if mem_info['is_sampled']:
+                if mem_info['uses_real_memory']:
+                    print(f"  计算方式: 采样估算（采样 {mem_info['sample_count']} 个，使用真实 MEMORY USAGE）")
+                else:
+                    print(f"  计算方式: 采样估算（采样 {mem_info['sample_count']} 个，使用估算方法）")
+            else:
+                if mem_info['uses_real_memory']:
+                    print(f"  计算方式: 真实值（全部计算，使用 MEMORY USAGE）")
+                else:
+                    print(f"  计算方式: 估算值（全部计算，使用估算方法）")
+            
             print(f"  示例 key:")
             for sample_key in stats['sample_keys'][:3]:
                 ttl = client.ttl(sample_key)
@@ -281,10 +381,13 @@ def analyze_cache_by_prefix(client):
         import traceback
         traceback.print_exc()
  
-def analyze_specific_prefixes(client, prefixes):
+def analyze_specific_prefixes(client, prefixes, db_num=0):
     """分析指定的前缀"""
     print("=" * 60)
-    print(f"分析指定前缀: {', '.join(prefixes)}")
+    if db_num > 0:
+        print(f"数据库 {db_num} - 分析指定前缀: {', '.join(prefixes)}")
+    else:
+        print(f"分析指定前缀: {', '.join(prefixes)}")
     print("=" * 60)
  
     for prefix in prefixes:
@@ -315,6 +418,77 @@ def analyze_specific_prefixes(client, prefixes):
         else:
             print(f"  内存占用量: {format_bytes(total_memory)}")
  
+def get_all_databases():
+    """获取所有有数据的数据库列表"""
+    databases = []
+    # Redis 默认有 16 个数据库（0-15）
+    for db_num in range(16):
+        try:
+            client = get_redis_client(db=db_num)
+            client.ping()
+            # 检查是否有 key
+            key_count = client.dbsize()
+            if key_count > 0:
+                databases.append(db_num)
+        except:
+            pass
+    return databases
+
+def analyze_all_databases(args):
+    """分析所有数据库"""
+    print("=" * 60)
+    print("扫描所有数据库...")
+    print("=" * 60)
+    
+    databases = get_all_databases()
+    
+    if not databases:
+        print("未找到有数据的数据库")
+        return
+    
+    print(f"发现 {len(databases)} 个有数据的数据库: {databases}\n")
+    
+    # 汇总统计
+    total_stats_by_prefix = defaultdict(lambda: {'count': 0, 'memory': 0, 'dbs': []})
+    total_keys_all_db = 0
+    total_memory_all_db = 0
+    
+    for db_num in databases:
+        try:
+            client = get_redis_client(db=db_num)
+            client.ping()
+            db_size = client.dbsize()
+            
+            print(f"\n{'='*60}")
+            print(f"数据库 {db_num} (共 {db_size:,} 个 key)")
+            print(f"{'='*60}")
+            
+            if args.prefix:
+                analyze_specific_prefixes(client, args.prefix, db_num=db_num)
+            else:
+                # 分析当前数据库
+                analyze_cache_by_prefix(client, args, db_num=db_num)
+                
+                # 收集统计信息（简化版，只统计 key 数量）
+                total_keys_all_db += db_size
+                
+        except Exception as e:
+            print(f"❌ 数据库 {db_num} 分析失败: {e}")
+            import traceback
+            traceback.print_exc()
+            continue
+    
+    # 显示汇总统计
+    if not args.prefix:
+        print("\n" + "=" * 60)
+        print("所有数据库汇总")
+        print("=" * 60)
+        print(f"有数据的数据库: {len(databases)} 个 ({', '.join(map(str, databases))})")
+        print(f"总 key 数量: {total_keys_all_db:,}")
+        print(f"\n提示: 要查看详细的内存统计，请分别运行每个数据库:")
+        for db_num in databases:
+            print(f"  python scripts/redis/check_cache_stats.py --db {db_num}")
+
 def main():
     """主函数"""
     import argparse
@@ -322,6 +496,10 @@ def main():
     parser = argparse.ArgumentParser(description='统计 Redis 缓存的条目数和内存占用量')
     parser.add_argument('--prefix', nargs='+', help='指定要分析的前缀（如: trans embedding）')
     parser.add_argument('--all', action='store_true', help='分析所有前缀（默认）')
+    parser.add_argument('--real', action='store_true', help='计算所有 key 的真实内存（很慢，但准确）')
+    parser.add_argument('--sample-size', type=int, default=100, help='采样大小（默认 100，仅当 key 数量 > 采样大小时使用）')
+    parser.add_argument('--db', type=int, help='指定数据库编号（0-15），默认只统计 db 0')
+    parser.add_argument('--all-db', action='store_true', help='统计所有数据库（0-15）')
  
     args = parser.parse_args()
  
@@ -330,22 +508,38 @@ def main():
     print(f"检查时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
     print()
  
+    # 如果指定了 --all-db，分析所有数据库
+    if args.all_db:
+        analyze_all_databases(args)
+        print("\n" + "=" * 60)
+        print("统计完成")
+        print("=" * 60)
+        return
+    
+    # 否则分析指定或默认的数据库
+    db_num = args.db if args.db is not None else 0
+    
     try:
-        client = get_redis_client()
+        client = get_redis_client(db=db_num)
         client.ping()
-        print("✅ Redis 连接成功\n")
+        if db_num > 0:
+            print(f"✅ Redis 连接成功（数据库 {db_num}）\n")
+        else:
+            print("✅ Redis 连接成功（默认数据库 0）\n")
     except Exception as e:
         print(f"❌ Redis 连接失败: {e}")
         print(f"\n请检查:")
         print(f"  - Host: {REDIS_CONFIG.get('host', 'localhost')}")
         print(f"  - Port: {REDIS_CONFIG.get('port', 6479)}")
         print(f"  - Password: {'已配置' if REDIS_CONFIG.get('password') else '未配置'}")
+        print(f"  - Database: {db_num}")
         return
  
     if args.prefix:
-        analyze_specific_prefixes(client, args.prefix)
+        analyze_specific_prefixes(client, args.prefix, db_num=db_num)
     else:
-        analyze_cache_by_prefix(client)
+        # 传递参数到分析函数
+        analyze_cache_by_prefix(client, args, db_num=db_num)
  
     print("\n" + "=" * 60)
     print("统计完成")
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+"""
+查找 Redis 中占用内存的主要 key
+
+分析为什么统计的缓存内存和总内存差异很大
+"""
+
+import redis
+import os
+import sys
+from collections import defaultdict
+from pathlib import Path
+from datetime import datetime
+
+# 添加项目路径（文件在 scripts/redis/ 目录下，需要向上三级到项目根目录）
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from config.env_config import REDIS_CONFIG
+
+def get_redis_client():
+    """获取 Redis 客户端"""
+    return redis.Redis(
+        host=REDIS_CONFIG.get('host', 'localhost'),
+        port=REDIS_CONFIG.get('port', 6479),
+        password=REDIS_CONFIG.get('password'),
+        decode_responses=True,
+        socket_timeout=10,
+        socket_connect_timeout=10,
+    )
+
+def format_bytes(bytes_size):
+    """格式化字节数为可读格式"""
+    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+        if bytes_size < 1024.0:
+            return f"{bytes_size:.2f} {unit}"
+        bytes_size /= 1024.0
+    return f"{bytes_size:.2f} PB"
+
+def get_key_memory_usage(client, key):
+    """获取单个 key 的内存占用量（字节）"""
+    try:
+        # 使用 MEMORY USAGE 命令（Redis 4.0+）
+        try:
+            memory = client.execute_command('MEMORY', 'USAGE', key)
+            return memory if memory else 0
+        except:
+            # 如果 MEMORY USAGE 不可用，使用估算方法
+            key_size = len(key.encode('utf-8'))
+            value = client.get(key)
+            if value:
+                value_size = len(value.encode('utf-8'))
+            else:
+                value_size = 0
+            overhead = 48 + 24 + 100
+            return key_size + value_size + overhead
+    except Exception as e:
+        return 0
+
+def analyze_all_keys(client, top_n=50):
+    """分析所有 key 的内存占用，找出占用最多的"""
+    print("=" * 60)
+    print("分析所有 key 的内存占用")
+    print("=" * 60)
+    
+    try:
+        # 获取总内存信息
+        info = client.info('memory')
+        used_memory = info.get('used_memory', 0)
+        used_memory_human = info.get('used_memory_human', '0B')
+        
+        print(f"Redis 总使用内存: {used_memory_human} ({used_memory:,} bytes)\n")
+        
+        # 扫描所有 key
+        print("扫描所有 key...")
+        all_keys = []
+        cursor = 0
+        while True:
+            cursor, batch = client.scan(cursor, count=1000)
+            all_keys.extend(batch)
+            if cursor == 0:
+                break
+        
+        total_keys = len(all_keys)
+        print(f"总 key 数量: {total_keys:,}\n")
+        
+        # 分析 key 的命名模式
+        print("分析 key 命名模式...")
+        no_prefix_count = 0
+        prefix_patterns = defaultdict(int)
+        
+        for key in all_keys:
+            if ':' in key:
+                prefix = key.split(':', 1)[0]
+                prefix_patterns[prefix] += 1
+            else:
+                no_prefix_count += 1
+        
+        print(f"  无前缀的 key: {no_prefix_count:,}")
+        print(f"  有前缀的 key: {total_keys - no_prefix_count:,}")
+        print(f"  不同前缀数量: {len(prefix_patterns):,}\n")
+        
+        # 显示所有前缀
+        print("所有前缀列表:")
+        sorted_prefixes = sorted(prefix_patterns.items(), key=lambda x: x[1], reverse=True)
+        for prefix, count in sorted_prefixes[:20]:
+            print(f"  {prefix}:* - {count:,} 个 key")
+        if len(sorted_prefixes) > 20:
+            print(f"  ... 还有 {len(sorted_prefixes) - 20} 个前缀")
+        
+        # 采样分析内存占用
+        print(f"\n采样分析内存占用（采样前 {min(1000, total_keys)} 个 key）...")
+        
+        key_memories = []
+        sample_size = min(1000, total_keys)
+        import random
+        sample_keys = random.sample(all_keys, sample_size) if total_keys > sample_size else all_keys
+        
+        processed = 0
+        for key in sample_keys:
+            memory = get_key_memory_usage(client, key)
+            if memory > 0:
+                key_memories.append((key, memory))
+            processed += 1
+            if processed % 100 == 0:
+                print(f"  已处理: {processed}/{sample_size}")
+        
+        # 按内存排序
+        key_memories.sort(key=lambda x: x[1], reverse=True)
+        
+        # 计算采样统计
+        total_sample_memory = sum(mem for _, mem in key_memories)
+        avg_memory = total_sample_memory / len(key_memories) if key_memories else 0
+        estimated_total_memory = avg_memory * total_keys
+        
+        print(f"\n采样统计:")
+        print(f"  采样 key 数量: {len(key_memories):,}")
+        print(f"  采样总内存: {format_bytes(total_sample_memory)}")
+        print(f"  平均每个 key 内存: {format_bytes(avg_memory)}")
+        print(f"  估算所有 key 总内存: {format_bytes(estimated_total_memory)}")
+        print(f"  实际 Redis 使用内存: {format_bytes(used_memory)}")
+        print(f"  差异: {format_bytes(used_memory - estimated_total_memory)}")
+        
+        # 显示占用内存最多的 key
+        print(f"\n占用内存最多的 {top_n} 个 key:")
+        print(f"{'排名':<6} {'内存':<15} {'Key'}")
+        print("-" * 80)
+        
+        for i, (key, memory) in enumerate(key_memories[:top_n], 1):
+            key_display = key[:60] + "..." if len(key) > 60 else key
+            print(f"{i:<6} {format_bytes(memory):<15} {key_display}")
+        
+        # 分析内存差异的原因
+        print("\n" + "=" * 60)
+        print("内存差异分析")
+        print("=" * 60)
+        
+        difference = used_memory - estimated_total_memory
+        difference_percent = (difference / used_memory * 100) if used_memory > 0 else 0
+        
+        print(f"实际内存: {format_bytes(used_memory)}")
+        print(f"估算 key 内存: {format_bytes(estimated_total_memory)}")
+        print(f"差异: {format_bytes(difference)} ({difference_percent:.1f}%)")
+        
+        print("\n可能的原因:")
+        print("1. Redis 内部数据结构开销（hash table、skiplist 等）")
+        print("2. 内存碎片")
+        print("3. Redis 进程本身的内存占用")
+        print("4. 其他数据结构（如 list、set、zset、hash）的内存开销更大")
+        print("5. 采样估算的误差")
+        
+        # 检查是否有大 value
+        print(f"\n检查是否有超大 value（> 1MB）...")
+        large_values = []
+        for key, memory in key_memories[:100]:  # 检查前 100 个最大的
+            if memory > 1024 * 1024:  # > 1MB
+                large_values.append((key, memory))
+        
+        if large_values:
+            print(f"发现 {len(large_values)} 个超大 value (> 1MB):")
+            for key, memory in large_values[:10]:
+                key_display = key[:60] + "..." if len(key) > 60 else key
+                print(f"  {format_bytes(memory):<15} {key_display}")
+        else:
+            print("  未发现超大 value")
+        
+        # 检查 key 类型分布
+        print(f"\n检查 key 类型分布（采样前 1000 个）...")
+        type_distribution = defaultdict(int)
+        for key in sample_keys[:1000]:
+            try:
+                key_type = client.type(key)
+                type_distribution[key_type] += 1
+            except:
+                pass
+        
+        print("Key 类型分布:")
+        for key_type, count in sorted(type_distribution.items(), key=lambda x: x[1], reverse=True):
+            print(f"  {key_type}: {count}")
+        
+    except Exception as e:
+        print(f"❌ 分析失败: {e}")
+        import traceback
+        traceback.print_exc()
+
+def main():
+    """主函数"""
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='查找 Redis 中占用内存的主要 key')
+    parser.add_argument('--top', type=int, default=50, help='显示占用内存最多的 N 个 key（默认 50）')
+    
+    args = parser.parse_args()
+    
+    print("Redis 内存占用分析工具")
+    print("=" * 60)
+    print(f"检查时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print()
+    
+    try:
+        client = get_redis_client()
+        client.ping()
+        print("✅ Redis 连接成功\n")
+    except Exception as e:
+        print(f"❌ Redis 连接失败: {e}")
+        return
+    
+    analyze_all_keys(client, top_n=args.top)
+    
+    print("\n" + "=" * 60)
+    print("分析完成")
+    print("=" * 60)
+
+if __name__ == "__main__":
+    main()