import openai import time import requests import json client = openai.Client( api_key="cannot be empty", base_url="http://127.0.0.1:9997/v1" ) # 记录开始时间 start_time = time.time() a = client.embeddings.create( model='bge-m3', input=["What is the capital of China?"] ) # 记录结束时间 end_time = time.time() #print(a) print(f"\n耗时: {end_time - start_time:.4f} 秒") # 记录开始时间 start_time = time.time() a = client.embeddings.create( model='Qwen3-Embedding-0.6B', input=["What is the capital of China?"] ) # 记录结束时间 end_time = time.time() #print(a) print(f"\n耗时: {end_time - start_time:.4f} 秒") # ========== HTTP API 测试 ========== print("\n" + "="*50) print("HTTP API 测试") print("="*50) # 配置 XINFERENCE_HOST = "127.0.0.1" XINFERENCE_PORT = "9997" base_url = f"http://{XINFERENCE_HOST}:{XINFERENCE_PORT}/v1/embeddings" # 测试 bge-m3 模型 print("\n测试模型: bge-m3") start_time = time.time() response = requests.post( base_url, headers={ 'accept': 'application/json', 'Content-Type': 'application/json' }, json={ "model": "bge-m3", "input": "What is the capital of China?" } ) end_time = time.time() if response.status_code == 200: result = response.json() print(f"状态码: {response.status_code}") print(f"模型: {result.get('model', 'N/A')}") print(f"使用token数: {result.get('usage', {}).get('total_tokens', 'N/A')}") print(f"嵌入向量维度: {len(result.get('data', [{}])[0].get('embedding', []))}") print(f"耗时: {end_time - start_time:.4f} 秒") else: print(f"请求失败,状态码: {response.status_code}") print(f"错误信息: {response.text}") # 测试 Qwen3-Embedding-0.6B 模型 print("\n测试模型: Qwen3-Embedding-0.6B") start_time = time.time() response = requests.post( base_url, headers={ 'accept': 'application/json', 'Content-Type': 'application/json' }, json={ "model": "Qwen3-Embedding-0.6B", "input": "What is the capital of China?" } ) end_time = time.time() if response.status_code == 200: result = response.json() print(f"状态码: {response.status_code}") print(f"模型: {result.get('model', 'N/A')}") print(f"使用token数: {result.get('usage', {}).get('total_tokens', 'N/A')}") print(f"嵌入向量维度: {len(result.get('data', [{}])[0].get('embedding', []))}") print(f"耗时: {end_time - start_time:.4f} 秒") else: print(f"请求失败,状态码: {response.status_code}") print(f"错误信息: {response.text}")