diagnose_issues.py 7.74 KB
#!/usr/bin/env python3
"""
诊断翻译和向量生成问题
"""

import sys
import os
import traceback

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

def diagnose_translation_issue():
    """诊断翻译问题"""
    print("🔍 诊断翻译功能...")
    print("-" * 50)

    try:
        from query.translator import Translator
        from config.env_config import get_deepl_key

        # 检查API密钥
        try:
            api_key = get_deepl_key()
            print(f"✅ DeepL API密钥已配置: {'*' * len(api_key[:8]) if api_key else 'None'}")
        except Exception as e:
            print(f"❌ DeepL API密钥配置失败: {e}")
            api_key = None

        # 创建翻译器
        translator = Translator(api_key=api_key, use_cache=True)
        print(f"✅ 翻译器创建成功,API密钥状态: {'已配置' if api_key else '未配置'}")

        # 测试翻译
        test_text = "推车"
        print(f"\n📝 测试翻译文本: '{test_text}'")

        # 测试英文翻译
        result_en = translator.translate(test_text, "en", "zh")
        print(f"🇺🇸 英文翻译结果: {result_en}")

        # 测试俄文翻译
        result_ru = translator.translate(test_text, "ru", "zh")
        print(f"🇷🇺 俄文翻译结果: {result_ru}")

        # 测试多语言翻译
        results = translator.translate_multi(test_text, ["en", "ru"], "zh")
        print(f"🌍 多语言翻译结果: {results}")

        # 检查翻译需求逻辑
        needs = translator.get_translation_needs("zh", ["en", "ru"])
        print(f"🎯 翻译需求分析: {needs}")

        if api_key:
            print("\n✅ 翻译功能配置正确,可能的问题:")
            print("  1. 网络连接问题")
            print("  2. API限额或配额问题")
            print("  3. DeepL服务暂时不可用")
        else:
            print("\n⚠️  翻译功能处于模拟模式(无API密钥)")
            print("  这会导致翻译返回原始文本或None")

    except Exception as e:
        print(f"❌ 翻译功能诊断失败: {e}")
        traceback.print_exc()

def diagnose_embedding_issue():
    """诊断向量生成问题"""
    print("\n🔍 诊断向量生成功能...")
    print("-" * 50)

    try:
        from embeddings.text_encoder import BgeEncoder
        import torch

        # 检查CUDA可用性
        cuda_available = torch.cuda.is_available()
        print(f"🔧 CUDA可用性: {'是' if cuda_available else '否'}")
        if cuda_available:
            print(f"🔧 CUDA设备数量: {torch.cuda.device_count()}")
            print(f"🔧 当前CUDA设备: {torch.cuda.current_device()}")

        # 尝试创建编码器
        print("\n📦 尝试创建BGE编码器...")
        try:
            encoder = BgeEncoder()
            print("✅ BGE编码器创建成功")
        except Exception as e:
            print(f"❌ BGE编码器创建失败: {e}")
            print("可能的原因:")
            print("  1. 模型文件未下载")
            print("  2. 内存不足")
            print("  3. 依赖包未正确安装")
            return

        # 测试向量生成
        test_text = "推车"
        print(f"\n📝 测试向量生成文本: '{test_text}'")

        try:
            # 尝试CPU模式
            print("🔄 尝试CPU模式...")
            embedding_cpu = encoder.encode(test_text, device='cpu')
            print(f"✅ CPU模式向量生成成功,形状: {embedding_cpu.shape}")

            # 尝试CUDA模式(如果可用)
            if cuda_available:
                print("🔄 尝试CUDA模式...")
                embedding_cuda = encoder.encode(test_text, device='cuda')
                print(f"✅ CUDA模式向量生成成功,形状: {embedding_cuda.shape}")
            else:
                print("⚠️  CUDA不可用,跳过GPU测试")

        except Exception as e:
            print(f"❌ 向量生成失败: {e}")
            print("可能的原因:")
            print("  1. 模型加载问题")
            print("  2. 内存不足")
            print("  3. 设备配置问题")

    except Exception as e:
        print(f"❌ 向量生成功能诊断失败: {e}")
        traceback.print_exc()

def diagnose_config_issue():
    """诊断配置问题"""
    print("\n🔍 诊断配置问题...")
    print("-" * 50)

    try:
        from config import CustomerConfig
        from config.config_loader import load_customer_config

        # 加载配置
        config = load_customer_config("customer1")
        print(f"✅ 配置加载成功: {config.customer_id}")

        # 检查查询配置
        query_config = config.query_config
        print(f"📝 翻译功能启用: {query_config.enable_translation}")
        print(f"🔤 向量生成启用: {query_config.enable_text_embedding}")
        print(f"🌍 支持的语言: {query_config.supported_languages}")

        # 检查API密钥配置
        try:
            from config.env_config import get_deepl_key
            api_key = get_deepl_key()
            print(f"🔑 DeepL API密钥: {'已配置' if api_key else '未配置'}")
        except:
            print("🔑 DeepL API密钥: 配置加载失败")

    except Exception as e:
        print(f"❌ 配置诊断失败: {e}")
        traceback.print_exc()

def simulate_query_parsing():
    """模拟查询解析过程"""
    print("\n🔍 模拟查询解析过程...")
    print("-" * 50)

    try:
        from context.request_context import create_request_context
        from query.query_parser import QueryParser
        from config import CustomerConfig
        from config.config_loader import load_customer_config

        # 加载配置
        config = load_customer_config("customer1")
        parser = QueryParser(config)
        context = create_request_context("test_diagnosis", "diagnosis_user")

        # 模拟解析"推车"
        print("📝 开始解析查询: '推车'")

        # 检查各个功能是否启用
        print(f"  - 翻译功能: {'启用' if config.query_config.enable_translation else '禁用'}")
        print(f"  - 向量功能: {'启用' if config.query_config.enable_text_embedding else '禁用'}")

        # 检查翻译器状态
        if hasattr(parser, '_translator') and parser._translator:
            translator_has_key = bool(parser._translator.api_key)
            print(f"  - 翻译器API密钥: {'有' if translator_has_key else '无'}")
        else:
            print(f"  - 翻译器状态: 未初始化")

        # 检查向量编码器状态
        if hasattr(parser, '_text_encoder') and parser._text_encoder:
            print(f"  - 向量编码器: 已初始化")
        else:
            print(f"  - 向量编码器: 未初始化")

        # 执行解析
        result = parser.parse("推车", context=context, generate_vector=config.query_config.enable_text_embedding)

        print(f"\n📊 解析结果:")
        print(f"  原查询: {result.original_query}")
        print(f"  标准化: {result.normalized_query}")
        print(f"  重写后: {result.rewritten_query}")
        print(f"  检测语言: {result.detected_language}")
        print(f"  域: {result.domain}")
        print(f"  翻译结果: {result.translations}")
        print(f"  向量: {'有' if result.query_vector is not None else '无'}")

        if result.query_vector is not None:
            print(f"  向量形状: {result.query_vector.shape}")

    except Exception as e:
        print(f"❌ 查询解析模拟失败: {e}")
        traceback.print_exc()

if __name__ == "__main__":
    print("🧪 开始系统诊断...")
    print("=" * 60)

    diagnose_translation_issue()
    diagnose_embedding_issue()
    diagnose_config_issue()
    simulate_query_parsing()

    print("\n" + "=" * 60)
    print("🏁 诊断完成!请查看上述结果找出问题原因。")