Commit 153a592eca2170d509a46d9a4d8c0a93b3b9dc93

Authored by tangwang
1 parent a866b688

redis统计脚本

query/translator.py
@@ -681,7 +681,7 @@ class Translator: @@ -681,7 +681,7 @@ class Translator:
681 try: 681 try:
682 cache_key = f"{self.cache_prefix}:{target_lang.upper()}:{text}" 682 cache_key = f"{self.cache_prefix}:{target_lang.upper()}:{text}"
683 self.redis_client.setex(cache_key, self.expire_seconds, translation) 683 self.redis_client.setex(cache_key, self.expire_seconds, translation)
684 - logger.debug( 684 + logger.info(
685 f"[Translator] Redis cache write | Original text: '{text}' | Target language: {target_lang} | " 685 f"[Translator] Redis cache write | Original text: '{text}' | Target language: {target_lang} | "
686 f"Cache key: {cache_key} | Translation result: '{translation}'" 686 f"Cache key: {cache_key} | Translation result: '{translation}'"
687 ) 687 )
scripts/check_cache_stats.py 0 → 100755
@@ -0,0 +1,355 @@ @@ -0,0 +1,355 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +统计各种缓存的条目数和内存占用量
  4 +
  5 +按 key 前缀分类统计,帮助了解不同缓存的使用情况
  6 +
  7 +使用方法:
  8 +
  9 +直接使用:
  10 +python scripts/check_cache_stats.py
  11 +
  12 +或者 只统计以下三种前缀:
  13 +python scripts/check_cache_stats.py --prefix trans embedding product
  14 +
  15 +
  16 +
  17 +
  18 +其他简单的统计方法(不依赖本脚本,直接使用redis-cli命令):
  19 +
  20 +# 查看所有 key 的前缀分布(快速但不准确)
  21 +redis-cli -h localhost -p 6479 -a 'BMfv5aI31kgHWtlx' --no-auth-warning --scan --pattern "*" | cut -d: -f1 | sort | uniq -c | sort -rn
  22 +
  23 +# 统计特定前缀的数量
  24 +redis-cli -h localhost -p 6479 -a 'BMfv5aI31kgHWtlx' --no-auth-warning --scan --pattern "trans:*" | wc -l
  25 +redis-cli -h localhost -p 6479 -a 'BMfv5aI31kgHWtlx' --no-auth-warning --scan --pattern "embedding:*" | wc -l
  26 +
  27 +# 查看内存统计 ( Redis MEMORY STATS )
  28 +redis-cli -h localhost -p 6479 -a 'BMfv5aI31kgHWtlx' --no-auth-warning MEMORY STATS
  29 +
  30 +"""
  31 +
  32 +import redis
  33 +import os
  34 +import sys
  35 +from collections import defaultdict
  36 +from pathlib import Path
  37 +from datetime import datetime
  38 +
  39 +# 添加项目路径
  40 +project_root = Path(__file__).parent.parent
  41 +sys.path.insert(0, str(project_root))
  42 +
  43 +from config.env_config import REDIS_CONFIG
  44 +
  45 +def get_redis_client():
  46 + """获取 Redis 客户端"""
  47 + return redis.Redis(
  48 + host=REDIS_CONFIG.get('host', 'localhost'),
  49 + port=REDIS_CONFIG.get('port', 6479),
  50 + password=REDIS_CONFIG.get('password'),
  51 + decode_responses=True,
  52 + socket_timeout=10,
  53 + socket_connect_timeout=10,
  54 + )
  55 +
  56 +def get_key_prefix(key):
  57 + """提取 key 的前缀(第一个冒号之前的部分)"""
  58 + if ':' in key:
  59 + return key.split(':', 1)[0]
  60 + return key
  61 +
  62 +def format_bytes(bytes_size):
  63 + """格式化字节数为可读格式"""
  64 + for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
  65 + if bytes_size < 1024.0:
  66 + return f"{bytes_size:.2f} {unit}"
  67 + bytes_size /= 1024.0
  68 + return f"{bytes_size:.2f} PB"
  69 +
  70 +def get_key_memory_usage(client, key):
  71 + """获取单个 key 的内存占用量(字节)"""
  72 + try:
  73 + # 使用 MEMORY USAGE 命令(Redis 4.0+)
  74 + try:
  75 + memory = client.execute_command('MEMORY', 'USAGE', key)
  76 + return memory if memory else 0
  77 + except:
  78 + # 如果 MEMORY USAGE 不可用,使用估算方法
  79 + # 获取 key 和 value 的大小
  80 + key_size = len(key.encode('utf-8'))
  81 +
  82 + # 获取 value
  83 + value = client.get(key)
  84 + if value:
  85 + value_size = len(value.encode('utf-8'))
  86 + else:
  87 + # 尝试获取其他类型
  88 + ttl = client.ttl(key)
  89 + if ttl == -2: # key 不存在
  90 + return 0
  91 + # 估算:key + 基础开销
  92 + value_size = 0
  93 +
  94 + # Redis 内存开销估算(粗略)
  95 + # key 对象开销: ~48 bytes
  96 + # value 对象开销: ~24 bytes
  97 + # 其他开销: ~100 bytes
  98 + overhead = 48 + 24 + 100
  99 + return key_size + value_size + overhead
  100 + except Exception as e:
  101 + return 0
  102 +
  103 +def scan_all_keys(client, pattern="*"):
  104 + """扫描所有匹配的 key"""
  105 + keys = []
  106 + cursor = 0
  107 + while True:
  108 + cursor, batch = client.scan(cursor, match=pattern, count=1000)
  109 + keys.extend(batch)
  110 + if cursor == 0:
  111 + break
  112 + return keys
  113 +
  114 +def analyze_cache_by_prefix(client):
  115 + """按前缀分析缓存"""
  116 + print("=" * 60)
  117 + print("扫描 Redis 中的所有 key...")
  118 + print("=" * 60)
  119 +
  120 + try:
  121 + # 扫描所有 key
  122 + all_keys = scan_all_keys(client)
  123 + total_keys = len(all_keys)
  124 +
  125 + print(f"总 key 数量: {total_keys:,}")
  126 + print(f"开始分析...\n")
  127 +
  128 + # 按前缀分类
  129 + prefix_stats = defaultdict(lambda: {
  130 + 'count': 0,
  131 + 'memory': 0,
  132 + 'keys': [],
  133 + 'sample_keys': [] # 采样一些 key 用于显示
  134 + })
  135 +
  136 + # 统计每个前缀
  137 + processed = 0
  138 + for key in all_keys:
  139 + prefix = get_key_prefix(key)
  140 + prefix_stats[prefix]['count'] += 1
  141 + prefix_stats[prefix]['keys'].append(key)
  142 +
  143 + # 采样前 5 个 key
  144 + if len(prefix_stats[prefix]['sample_keys']) < 5:
  145 + prefix_stats[prefix]['sample_keys'].append(key)
  146 +
  147 + processed += 1
  148 + if processed % 1000 == 0:
  149 + print(f" 已处理: {processed:,} / {total_keys:,} ({processed*100//total_keys}%)")
  150 +
  151 + print(f" 完成: {processed:,} / {total_keys:,}\n")
  152 +
  153 + # 计算每个前缀的内存占用量
  154 + print("=" * 60)
  155 + print("计算内存占用量...")
  156 + print("=" * 60)
  157 +
  158 + prefix_memory = {}
  159 + for prefix, stats in prefix_stats.items():
  160 + print(f" 计算 {prefix}:* 的内存...")
  161 + total_memory = 0
  162 + sample_count = min(100, stats['count']) # 采样前 100 个
  163 +
  164 + # 如果数量较少,全部计算;否则采样计算
  165 + if stats['count'] <= 100:
  166 + keys_to_check = stats['keys']
  167 + else:
  168 + # 采样计算
  169 + import random
  170 + keys_to_check = random.sample(stats['keys'], sample_count)
  171 +
  172 + for key in keys_to_check:
  173 + memory = get_key_memory_usage(client, key)
  174 + total_memory += memory
  175 +
  176 + # 如果是采样,估算总内存
  177 + if stats['count'] > sample_count:
  178 + avg_memory = total_memory / sample_count
  179 + estimated_total = avg_memory * stats['count']
  180 + prefix_memory[prefix] = {
  181 + 'memory': estimated_total,
  182 + 'is_estimated': True,
  183 + 'sample_count': sample_count
  184 + }
  185 + else:
  186 + prefix_memory[prefix] = {
  187 + 'memory': total_memory,
  188 + 'is_estimated': False,
  189 + 'sample_count': stats['count']
  190 + }
  191 +
  192 + # 显示统计结果
  193 + print("\n" + "=" * 60)
  194 + print("缓存统计结果(按前缀分类)")
  195 + print("=" * 60)
  196 +
  197 + # 按内存使用量排序
  198 + sorted_prefixes = sorted(
  199 + prefix_stats.items(),
  200 + key=lambda x: prefix_memory[x[0]]['memory'],
  201 + reverse=True
  202 + )
  203 +
  204 + total_memory_all = sum(pm['memory'] for pm in prefix_memory.values())
  205 +
  206 + print(f"{'前缀':<20} {'条目数':>12} {'内存占用量':>20} {'占比':>10} {'说明'}")
  207 + print("-" * 80)
  208 +
  209 + for prefix, stats in sorted_prefixes:
  210 + memory_info = prefix_memory[prefix]
  211 + memory = memory_info['memory']
  212 + memory_str = format_bytes(memory)
  213 + if memory_info['is_estimated']:
  214 + memory_str += f" (估算, 采样 {memory_info['sample_count']})"
  215 +
  216 + percentage = (memory / total_memory_all * 100) if total_memory_all > 0 else 0
  217 +
  218 + # 添加说明
  219 + description = ""
  220 + if prefix == 'trans':
  221 + description = "翻译缓存"
  222 + elif prefix.startswith('embedding') or prefix.startswith('emb'):
  223 + description = "向量化缓存"
  224 + elif prefix.startswith('session') or prefix.startswith('user'):
  225 + description = "会话/用户缓存"
  226 + elif prefix.startswith('product') or prefix.startswith('item'):
  227 + description = "商品缓存"
  228 + else:
  229 + description = "其他"
  230 +
  231 + print(f"{prefix:<20} {stats['count']:>12,} {memory_str:>30} {percentage:>9.1f}% {description}")
  232 +
  233 + print("-" * 80)
  234 + print(f"{'总计':<20} {total_keys:>12,} {format_bytes(total_memory_all):>30} {'100.0':>9}%")
  235 +
  236 + # 显示详细信息
  237 + print("\n" + "=" * 60)
  238 + print("详细信息(每个前缀的示例 key)")
  239 + print("=" * 60)
  240 +
  241 + for prefix, stats in sorted_prefixes[:10]: # 只显示前 10 个
  242 + print(f"\n{prefix}:* ({stats['count']:,} 个 key)")
  243 + print(f" 内存: {format_bytes(prefix_memory[prefix]['memory'])}")
  244 + print(f" 示例 key:")
  245 + for sample_key in stats['sample_keys'][:3]:
  246 + ttl = client.ttl(sample_key)
  247 + if ttl == -1:
  248 + ttl_str = "无过期时间"
  249 + elif ttl == -2:
  250 + ttl_str = "已过期"
  251 + else:
  252 + ttl_str = f"{ttl/86400:.1f} 天"
  253 + key_display = sample_key[:60] + "..." if len(sample_key) > 60 else sample_key
  254 + print(f" - {key_display} (TTL: {ttl_str})")
  255 +
  256 + # 获取 Redis 总内存信息
  257 + print("\n" + "=" * 60)
  258 + print("Redis 内存使用情况")
  259 + print("=" * 60)
  260 +
  261 + try:
  262 + info = client.info('memory')
  263 + used_memory = info.get('used_memory', 0)
  264 + used_memory_human = info.get('used_memory_human', '0B')
  265 + maxmemory = info.get('maxmemory', 0)
  266 + maxmemory_human = info.get('maxmemory_human', '0B')
  267 +
  268 + print(f"Redis 总使用内存: {used_memory_human} ({used_memory:,} bytes)")
  269 + print(f"统计的缓存内存: {format_bytes(total_memory_all)}")
  270 + print(f"内存占比: {(total_memory_all / used_memory * 100) if used_memory > 0 else 0:.1f}%")
  271 +
  272 + if maxmemory > 0:
  273 + print(f"最大内存限制: {maxmemory_human} ({maxmemory:,} bytes)")
  274 + usage_percent = (used_memory / maxmemory) * 100
  275 + print(f"内存使用率: {usage_percent:.2f}%")
  276 + except Exception as e:
  277 + print(f"获取内存信息失败: {e}")
  278 +
  279 + except Exception as e:
  280 + print(f"❌ 分析失败: {e}")
  281 + import traceback
  282 + traceback.print_exc()
  283 +
  284 +def analyze_specific_prefixes(client, prefixes):
  285 + """分析指定的前缀"""
  286 + print("=" * 60)
  287 + print(f"分析指定前缀: {', '.join(prefixes)}")
  288 + print("=" * 60)
  289 +
  290 + for prefix in prefixes:
  291 + pattern = f"{prefix}:*"
  292 + keys = scan_all_keys(client, pattern=pattern)
  293 +
  294 + if not keys:
  295 + print(f"\n{prefix}:* - 未找到 key")
  296 + continue
  297 +
  298 + print(f"\n{prefix}:*")
  299 + print(f" 条目数: {len(keys):,}")
  300 +
  301 + # 计算内存
  302 + total_memory = 0
  303 + sample_count = min(100, len(keys))
  304 + import random
  305 + sample_keys = random.sample(keys, sample_count) if len(keys) > sample_count else keys
  306 +
  307 + for key in sample_keys:
  308 + memory = get_key_memory_usage(client, key)
  309 + total_memory += memory
  310 +
  311 + if len(keys) > sample_count:
  312 + avg_memory = total_memory / sample_count
  313 + estimated_total = avg_memory * len(keys)
  314 + print(f" 内存占用量: {format_bytes(estimated_total)} (估算, 采样 {sample_count})")
  315 + else:
  316 + print(f" 内存占用量: {format_bytes(total_memory)}")
  317 +
  318 +def main():
  319 + """主函数"""
  320 + import argparse
  321 +
  322 + parser = argparse.ArgumentParser(description='统计 Redis 缓存的条目数和内存占用量')
  323 + parser.add_argument('--prefix', nargs='+', help='指定要分析的前缀(如: trans embedding)')
  324 + parser.add_argument('--all', action='store_true', help='分析所有前缀(默认)')
  325 +
  326 + args = parser.parse_args()
  327 +
  328 + print("Redis 缓存统计工具")
  329 + print("=" * 60)
  330 + print(f"检查时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
  331 + print()
  332 +
  333 + try:
  334 + client = get_redis_client()
  335 + client.ping()
  336 + print("✅ Redis 连接成功\n")
  337 + except Exception as e:
  338 + print(f"❌ Redis 连接失败: {e}")
  339 + print(f"\n请检查:")
  340 + print(f" - Host: {REDIS_CONFIG.get('host', 'localhost')}")
  341 + print(f" - Port: {REDIS_CONFIG.get('port', 6479)}")
  342 + print(f" - Password: {'已配置' if REDIS_CONFIG.get('password') else '未配置'}")
  343 + return
  344 +
  345 + if args.prefix:
  346 + analyze_specific_prefixes(client, args.prefix)
  347 + else:
  348 + analyze_cache_by_prefix(client)
  349 +
  350 + print("\n" + "=" * 60)
  351 + print("统计完成")
  352 + print("=" * 60)
  353 +
  354 +if __name__ == "__main__":
  355 + main()
scripts/monitor_eviction.py 0 → 100755
@@ -0,0 +1,89 @@ @@ -0,0 +1,89 @@
  1 +#!/usr/bin/env python3
  2 +"""
  3 +实时监控 Redis 缓存淘汰事件
  4 +
  5 +持续监控 evicted_keys 统计,当有新的淘汰发生时发出警告
  6 +"""
  7 +
  8 +import redis
  9 +import time
  10 +import sys
  11 +from pathlib import Path
  12 +from datetime import datetime
  13 +
  14 +# 添加项目路径
  15 +project_root = Path(__file__).parent.parent
  16 +sys.path.insert(0, str(project_root))
  17 +
  18 +from config.env_config import REDIS_CONFIG
  19 +
  20 +def get_redis_client():
  21 + """获取 Redis 客户端"""
  22 + return redis.Redis(
  23 + host=REDIS_CONFIG.get('host', 'localhost'),
  24 + port=REDIS_CONFIG.get('port', 6479),
  25 + password=REDIS_CONFIG.get('password'),
  26 + decode_responses=True,
  27 + socket_timeout=5,
  28 + socket_connect_timeout=5,
  29 + )
  30 +
  31 +def monitor_eviction(interval=5):
  32 + """持续监控淘汰事件"""
  33 + print("=" * 60)
  34 + print("Redis 缓存淘汰实时监控")
  35 + print("=" * 60)
  36 + print(f"监控间隔: {interval} 秒")
  37 + print("按 Ctrl+C 停止监控")
  38 + print("=" * 60)
  39 + print()
  40 +
  41 + try:
  42 + client = get_redis_client()
  43 + client.ping()
  44 + except Exception as e:
  45 + print(f"❌ Redis 连接失败: {e}")
  46 + return
  47 +
  48 + last_evicted = 0
  49 +
  50 + try:
  51 + while True:
  52 + info = client.info('stats')
  53 + current_evicted = info.get('evicted_keys', 0)
  54 +
  55 + if current_evicted > last_evicted:
  56 + new_evictions = current_evicted - last_evicted
  57 + timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  58 + print(f"[{timestamp}] ⚠️ 检测到 {new_evictions} 个新的淘汰事件!")
  59 + print(f" 累计淘汰总数: {current_evicted:,}")
  60 +
  61 + # 检查内存使用情况
  62 + mem_info = client.info('memory')
  63 + maxmemory = mem_info.get('maxmemory', 0)
  64 + used_memory = mem_info.get('used_memory', 0)
  65 + if maxmemory > 0:
  66 + usage_percent = (used_memory / maxmemory) * 100
  67 + print(f" 当前内存使用率: {usage_percent:.2f}%")
  68 +
  69 + last_evicted = current_evicted
  70 + else:
  71 + timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  72 + print(f"[{timestamp}] ✅ 无新淘汰事件 (累计: {current_evicted:,})")
  73 +
  74 + time.sleep(interval)
  75 +
  76 + except KeyboardInterrupt:
  77 + print("\n\n监控已停止")
  78 + except Exception as e:
  79 + print(f"\n❌ 监控出错: {e}")
  80 + import traceback
  81 + traceback.print_exc()
  82 +
  83 +if __name__ == "__main__":
  84 + import argparse
  85 + parser = argparse.ArgumentParser(description='实时监控 Redis 缓存淘汰事件')
  86 + parser.add_argument('--interval', type=int, default=5, help='监控间隔(秒),默认 5 秒')
  87 + args = parser.parse_args()
  88 +
  89 + monitor_eviction(interval=args.interval)