Commit 8810a6fa9c3779e2fa48766c049a8296618d496f

Authored by tangwang
1 parent e7f2b240

重构

1 # ==================== 1 # ====================
2 # OpenAI Configuration 2 # OpenAI Configuration
3 # ==================== 3 # ====================
4 -OPENAI_API_KEY=  
5 -OPENAI_MODEL=gpt-4o-mini  
6 -OPENAI_EMBEDDING_MODEL=text-embedding-3-small 4 +OPENAI_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b
  5 +OPENAI_MODEL=qwen-plus
  6 +# Base URL for Qwen/DashScope (OpenAI-compatible API)
  7 +# 北京: https://dashscope.aliyuncs.com/compatible-mode/v1
  8 +# 弗吉尼亚: https://dashscope-us.aliyuncs.com/compatible-mode/v1
  9 +# 新加坡: https://dashscope-intl.aliyuncs.com/compatible-mode/v1
  10 +OPENAI_API_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
7 OPENAI_TEMPERATURE=1 11 OPENAI_TEMPERATURE=1
8 OPENAI_MAX_TOKENS=1000 12 OPENAI_MAX_TOKENS=1000
9 13
10 # ==================== 14 # ====================
11 -# CLIP Server Configuration  
12 -# ====================  
13 -CLIP_SERVER_URL=grpc://localhost:51000  
14 -  
15 -# ====================  
16 -# Milvus Configuration  
17 -# ====================  
18 -MILVUS_HOST=localhost  
19 -MILVUS_PORT=19530  
20 -  
21 -# Collection settings  
22 -TEXT_COLLECTION_NAME=text_embeddings  
23 -IMAGE_COLLECTION_NAME=image_embeddings  
24 -TEXT_DIM=1536  
25 -IMAGE_DIM=512  
26 -  
27 -# ====================  
28 # Search Configuration 15 # Search Configuration
29 # ==================== 16 # ====================
30 TOP_K_RESULTS=30 17 TOP_K_RESULTS=30
31 SIMILARITY_THRESHOLD=0.6 18 SIMILARITY_THRESHOLD=0.6
32 19
  20 +# Search API (see docs/搜索API对接指南.md)
  21 +SEARCH_API_BASE_URL=http://120.76.41.98:6002
  22 +SEARCH_API_TENANT_ID=162
  23 +
33 # ==================== 24 # ====================
34 # Application Configuration 25 # Application Configuration
35 # ==================== 26 # ====================
@@ -53,7 +53,6 @@ data/** @@ -53,7 +53,6 @@ data/**
53 *.db 53 *.db
54 *.sqlite 54 *.sqlite
55 *.sqlite3 55 *.sqlite3
56 -data/milvus_lite.db  
57 56
58 # Docker volumes 57 # Docker volumes
59 volumes/ 58 volumes/
@@ -12,9 +12,9 @@ OmniShopAgent autonomously decides which tools to call, maintains conversation s @@ -12,9 +12,9 @@ OmniShopAgent autonomously decides which tools to call, maintains conversation s
12 12
13 **Key Features:** 13 **Key Features:**
14 - Autonomous tool selection and execution 14 - Autonomous tool selection and execution
15 -- Multi-modal search (text + image) 15 +- Text search via Search API
16 - Conversational context awareness 16 - Conversational context awareness
17 -- Real-time visual analysis 17 +- Real-time visual analysis (style extraction from images)
18 18
19 ## Tech Stack 19 ## Tech Stack
20 20
@@ -22,9 +22,7 @@ OmniShopAgent autonomously decides which tools to call, maintains conversation s @@ -22,9 +22,7 @@ OmniShopAgent autonomously decides which tools to call, maintains conversation s
22 |-----------|-----------| 22 |-----------|-----------|
23 | **Agent Framework** | LangGraph | 23 | **Agent Framework** | LangGraph |
24 | **LLM** | any LLM supported by LangChain | 24 | **LLM** | any LLM supported by LangChain |
25 -| **Text Embedding** | text-embedding-3-small |  
26 -| **Image Embedding** | CLIP ViT-B/32 |  
27 -| **Vector Database** | Milvus | 25 +| **Search** | Search API (HTTP) |
28 | **Frontend** | Streamlit | 26 | **Frontend** | Streamlit |
29 | **Dataset** | Kaggle Fashion Products | 27 | **Dataset** | Kaggle Fashion Products |
30 28
@@ -52,8 +50,7 @@ graph LR @@ -52,8 +50,7 @@ graph LR
52 ``` 50 ```
53 51
54 **Available Tools:** 52 **Available Tools:**
55 -- `search_products(query)` - Text-based semantic search  
56 -- `search_by_image(image_path)` - Visual similarity search 53 +- `search_products(query)` - Text-based product search via Search API
57 - `analyze_image_style(image_path)` - VLM style analysis 54 - `analyze_image_style(image_path)` - VLM style analysis
58 55
59 56
@@ -66,12 +63,6 @@ User: "winter coats for women" @@ -66,12 +63,6 @@ User: "winter coats for women"
66 Agent: search_products("winter coats women") → Returns 5 products 63 Agent: search_products("winter coats women") → Returns 5 products
67 ``` 64 ```
68 65
69 -**Image Upload:**  
70 -```  
71 -User: [uploads sneaker photo] "find similar"  
72 -Agent: search_by_image(path) → Returns visually similar shoes  
73 -```  
74 -  
75 **Style Analysis + Search:** 66 **Style Analysis + Search:**
76 ``` 67 ```
77 User: [uploads vintage jacket] "what style is this? find matching pants" 68 User: [uploads vintage jacket] "what style is this? find matching pants"
@@ -93,6 +84,8 @@ Agent: [remembers context] → search_products("red formal dresses") → Results @@ -93,6 +84,8 @@ Agent: [remembers context] → search_products("red formal dresses") → Results
93 User: [uploads office outfit] "I like the shirt but need something more casual" 84 User: [uploads office outfit] "I like the shirt but need something more casual"
94 Agent: analyze_image_style(path) → Extracts shirt details 85 Agent: analyze_image_style(path) → Extracts shirt details
95 search_products("casual shirt [color] [style]") → Returns casual alternatives 86 search_products("casual shirt [color] [style]") → Returns casual alternatives
  87 +
  88 +**Note:** For image uploads "find similar", use analyze_image_style first to extract attributes, then search_products with the description.
96 ``` 89 ```
97 90
98 ## Installation 91 ## Installation
@@ -100,7 +93,6 @@ Agent: analyze_image_style(path) → Extracts shirt details @@ -100,7 +93,6 @@ Agent: analyze_image_style(path) → Extracts shirt details
100 **Prerequisites:** 93 **Prerequisites:**
101 - Python 3.12+ (LangChain 1.x 要求 Python 3.10+) 94 - Python 3.12+ (LangChain 1.x 要求 Python 3.10+)
102 - OpenAI API Key 95 - OpenAI API Key
103 -- Docker & Docker Compose  
104 96
105 ### 1. Setup Environment 97 ### 1. Setup Environment
106 ```bash 98 ```bash
@@ -116,38 +108,14 @@ cp .env.example .env @@ -116,38 +108,14 @@ cp .env.example .env
116 # Edit .env and add your OPENAI_API_KEY 108 # Edit .env and add your OPENAI_API_KEY
117 ``` 109 ```
118 110
119 -### 2. Download Dataset  
120 -Download the [Fashion Product Images Dataset](https://www.kaggle.com/datasets/paramaggarwal/fashion-product-images-dataset) from Kaggle and extract to `./data/`:  
121 -  
122 -```python  
123 -python scripts/download_dataset.py  
124 -```  
125 -  
126 -Expected structure:  
127 -```  
128 -data/  
129 -├── images/ # ~44k product images  
130 -├── styles.csv # Product metadata  
131 -└── images.csv # Image filenames  
132 -```  
133 -  
134 -### 3. Start Services  
135 -  
136 -```bash  
137 -docker-compose up  
138 -python -m clip_server  
139 -```  
140 -  
141 -  
142 -### 4. Index Data 111 +### 2. (Optional) Download Dataset
  112 +For image style analysis, you may download the [Fashion Product Images Dataset](https://www.kaggle.com/datasets/paramaggarwal/fashion-product-images-dataset) from Kaggle:
143 113
144 ```bash 114 ```bash
145 -python scripts/index_data.py 115 +python scripts/download_dataset.py
146 ``` 116 ```
147 117
148 -This generates and stores text/image embeddings for all 44k products in Milvus.  
149 -  
150 -### 5. Launch Application 118 +### 3. Launch Application
151 ```bash 119 ```bash
152 # 使用启动脚本(推荐) 120 # 使用启动脚本(推荐)
153 ./scripts/start.sh 121 ./scripts/start.sh
@@ -155,6 +123,9 @@ This generates and stores text/image embeddings for all 44k products in Milvus. @@ -155,6 +123,9 @@ This generates and stores text/image embeddings for all 44k products in Milvus.
155 # 或直接运行 123 # 或直接运行
156 streamlit run app.py 124 streamlit run app.py
157 ``` 125 ```
  126 +
  127 +Product search uses the external Search API. Configure `SEARCH_API_BASE_URL` and `SEARCH_API_TENANT_ID` in `.env` if needed.
  128 +
158 Opens at `http://localhost:8501` 129 Opens at `http://localhost:8501`
159 130
160 ### CentOS 8 部署 131 ### CentOS 8 部署
app/agents/shopping_agent.py
@@ -52,11 +52,14 @@ class ShoppingAgent: @@ -52,11 +52,14 @@ class ShoppingAgent:
52 self.session_id = session_id or "default" 52 self.session_id = session_id or "default"
53 53
54 # Initialize LLM 54 # Initialize LLM
55 - self.llm = ChatOpenAI( 55 + llm_kwargs = dict(
56 model=settings.openai_model, 56 model=settings.openai_model,
57 temperature=settings.openai_temperature, 57 temperature=settings.openai_temperature,
58 api_key=settings.openai_api_key, 58 api_key=settings.openai_api_key,
59 ) 59 )
  60 + if settings.openai_api_base_url:
  61 + llm_kwargs["base_url"] = settings.openai_api_base_url
  62 + self.llm = ChatOpenAI(**llm_kwargs)
60 63
61 # Get tools and bind to model 64 # Get tools and bind to model
62 self.tools = get_all_tools() 65 self.tools = get_all_tools()
@@ -73,12 +76,11 @@ class ShoppingAgent: @@ -73,12 +76,11 @@ class ShoppingAgent:
73 # System prompt for the agent 76 # System prompt for the agent
74 system_prompt = """You are an intelligent fashion shopping assistant. You can: 77 system_prompt = """You are an intelligent fashion shopping assistant. You can:
75 1. Search for products by text description (use search_products) 78 1. Search for products by text description (use search_products)
76 -2. Find visually similar products from images (use search_by_image)  
77 -3. Analyze image style and attributes (use analyze_image_style) 79 +2. Analyze image style and attributes (use analyze_image_style)
78 80
79 When a user asks about products: 81 When a user asks about products:
80 - For text queries: use search_products directly 82 - For text queries: use search_products directly
81 -- For image uploads: decide if you need to analyze_image_style first, then search 83 +- For image uploads: use analyze_image_style first to understand the product, then use search_products with the extracted description
82 - You can call multiple tools in sequence if needed 84 - You can call multiple tools in sequence if needed
83 - Always provide helpful, friendly responses 85 - Always provide helpful, friendly responses
84 86
@@ -4,6 +4,7 @@ Loads environment variables and provides configuration objects @@ -4,6 +4,7 @@ Loads environment variables and provides configuration objects
4 """ 4 """
5 5
6 import os 6 import os
  7 +from typing import Optional
7 8
8 from pydantic_settings import BaseSettings 9 from pydantic_settings import BaseSettings
9 10
@@ -17,47 +18,20 @@ class Settings(BaseSettings): @@ -17,47 +18,20 @@ class Settings(BaseSettings):
17 # OpenAI Configuration 18 # OpenAI Configuration
18 openai_api_key: str 19 openai_api_key: str
19 openai_model: str = "gpt-4o-mini" 20 openai_model: str = "gpt-4o-mini"
20 - openai_embedding_model: str = "text-embedding-3-small"  
21 openai_temperature: float = 0.7 21 openai_temperature: float = 0.7
22 openai_max_tokens: int = 1000 22 openai_max_tokens: int = 1000
23 -  
24 - # CLIP Server Configuration  
25 - clip_server_url: str = "grpc://localhost:51000"  
26 -  
27 - # Milvus Configuration  
28 - milvus_uri: str = "http://localhost:19530"  
29 - milvus_host: str = "localhost"  
30 - milvus_port: int = 19530  
31 - text_collection_name: str = "text_embeddings"  
32 - image_collection_name: str = "image_embeddings"  
33 - text_dim: int = 1536  
34 - image_dim: int = 512  
35 -  
36 - @property  
37 - def milvus_uri_absolute(self) -> str:  
38 - """Get absolute path for Milvus URI  
39 -  
40 - Returns:  
41 - - For http/https URIs: returns as-is (Milvus Standalone)  
42 - - For file paths starting with ./: converts to absolute path (Milvus Lite)  
43 - - For other paths: returns as-is  
44 - """  
45 - import os  
46 -  
47 - # If it's a network URI, return as-is (Milvus Standalone)  
48 - if self.milvus_uri.startswith(("http://", "https://")):  
49 - return self.milvus_uri  
50 - # If it's a relative path, convert to absolute (Milvus Lite)  
51 - if self.milvus_uri.startswith("./"):  
52 - base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  
53 - return os.path.join(base_dir, self.milvus_uri[2:])  
54 - # Otherwise return as-is  
55 - return self.milvus_uri 23 + # Base URL for OpenAI-compatible APIs (e.g. Qwen/DashScope)
  24 + # Qwen 北京: https://dashscope.aliyuncs.com/compatible-mode/v1
  25 + openai_api_base_url: Optional[str] = None
56 26
57 # Search Configuration 27 # Search Configuration
58 top_k_results: int = 10 28 top_k_results: int = 10
59 similarity_threshold: float = 0.6 29 similarity_threshold: float = 0.6
60 30
  31 + # Search API (see docs/搜索API对接指南.md)
  32 + search_api_base_url: str = "http://120.76.41.98:6002"
  33 + search_api_tenant_id: str = "162"
  34 +
61 # Application Configuration 35 # Application Configuration
62 app_host: str = "0.0.0.0" 36 app_host: str = "0.0.0.0"
63 app_port: int = 8000 37 app_port: int = 8000
@@ -73,6 +47,7 @@ class Settings(BaseSettings): @@ -73,6 +47,7 @@ class Settings(BaseSettings):
73 env_file = ".env" 47 env_file = ".env"
74 env_file_encoding = "utf-8" 48 env_file_encoding = "utf-8"
75 case_sensitive = False 49 case_sensitive = False
  50 + extra = "ignore"
76 51
77 52
78 # Global settings instance 53 # Global settings instance
app/services/__init__.py
1 """ 1 """
2 Services Module 2 Services Module
3 -Provides database and embedding services for the application  
4 """ 3 """
5 -  
6 -from app.services.embedding_service import EmbeddingService, get_embedding_service  
7 -from app.services.milvus_service import MilvusService, get_milvus_service  
8 -  
9 -__all__ = [  
10 - "EmbeddingService",  
11 - "get_embedding_service",  
12 - "MilvusService",  
13 - "get_milvus_service",  
14 -]  
app/services/embedding_service.py deleted
@@ -1,293 +0,0 @@ @@ -1,293 +0,0 @@
1 -"""  
2 -Embedding Service for Text and Image Embeddings  
3 -Supports OpenAI text embeddings and CLIP image embeddings  
4 -"""  
5 -  
6 -import logging  
7 -from pathlib import Path  
8 -from typing import List, Optional, Union  
9 -  
10 -import numpy as np  
11 -from clip_client import Client as ClipClient  
12 -from openai import OpenAI  
13 -  
14 -from app.config import settings  
15 -  
16 -logger = logging.getLogger(__name__)  
17 -  
18 -  
19 -class EmbeddingService:  
20 - """Service for generating text and image embeddings"""  
21 -  
22 - def __init__(  
23 - self,  
24 - openai_api_key: Optional[str] = None,  
25 - clip_server_url: Optional[str] = None,  
26 - ):  
27 - """Initialize embedding service  
28 -  
29 - Args:  
30 - openai_api_key: OpenAI API key. If None, uses settings.openai_api_key  
31 - clip_server_url: CLIP server URL. If None, uses settings.clip_server_url  
32 - """  
33 - # Initialize OpenAI client for text embeddings  
34 - self.openai_api_key = openai_api_key or settings.openai_api_key  
35 - self.openai_client = OpenAI(api_key=self.openai_api_key)  
36 - self.text_embedding_model = settings.openai_embedding_model  
37 -  
38 - # Initialize CLIP client for image embeddings  
39 - self.clip_server_url = clip_server_url or settings.clip_server_url  
40 - self.clip_client: Optional[ClipClient] = None  
41 -  
42 - logger.info("Embedding service initialized")  
43 -  
44 - def connect_clip(self) -> None:  
45 - """Connect to CLIP server"""  
46 - try:  
47 - self.clip_client = ClipClient(server=self.clip_server_url)  
48 - logger.info(f"Connected to CLIP server at {self.clip_server_url}")  
49 - except Exception as e:  
50 - logger.error(f"Failed to connect to CLIP server: {e}")  
51 - raise  
52 -  
53 - def disconnect_clip(self) -> None:  
54 - """Disconnect from CLIP server"""  
55 - if self.clip_client:  
56 - # Note: clip_client doesn't have explicit close method  
57 - self.clip_client = None  
58 - logger.info("Disconnected from CLIP server")  
59 -  
60 - def get_text_embedding(self, text: str) -> List[float]:  
61 - """Get embedding for a single text  
62 -  
63 - Args:  
64 - text: Input text  
65 -  
66 - Returns:  
67 - Embedding vector as list of floats  
68 - """  
69 - try:  
70 - response = self.openai_client.embeddings.create(  
71 - input=text, model=self.text_embedding_model  
72 - )  
73 - embedding = response.data[0].embedding  
74 - logger.debug(f"Generated text embedding for: {text[:50]}...")  
75 - return embedding  
76 - except Exception as e:  
77 - logger.error(f"Failed to generate text embedding: {e}")  
78 - raise  
79 -  
80 - def get_text_embeddings_batch(  
81 - self, texts: List[str], batch_size: int = 100  
82 - ) -> List[List[float]]:  
83 - """Get embeddings for multiple texts in batches  
84 -  
85 - Args:  
86 - texts: List of input texts  
87 - batch_size: Number of texts to process at once  
88 -  
89 - Returns:  
90 - List of embedding vectors  
91 - """  
92 - all_embeddings = []  
93 -  
94 - for i in range(0, len(texts), batch_size):  
95 - batch = texts[i : i + batch_size]  
96 -  
97 - try:  
98 - response = self.openai_client.embeddings.create(  
99 - input=batch, model=self.text_embedding_model  
100 - )  
101 -  
102 - # Extract embeddings in the correct order  
103 - embeddings = [item.embedding for item in response.data]  
104 - all_embeddings.extend(embeddings)  
105 -  
106 - logger.info(  
107 - f"Generated text embeddings for batch {i // batch_size + 1}: {len(embeddings)} embeddings"  
108 - )  
109 -  
110 - except Exception as e:  
111 - logger.error(  
112 - f"Failed to generate text embeddings for batch {i // batch_size + 1}: {e}"  
113 - )  
114 - raise  
115 -  
116 - return all_embeddings  
117 -  
118 - def get_image_embedding(self, image_path: Union[str, Path]) -> List[float]:  
119 - """Get CLIP embedding for a single image  
120 -  
121 - Args:  
122 - image_path: Path to image file  
123 -  
124 - Returns:  
125 - Embedding vector as list of floats  
126 - """  
127 - if not self.clip_client:  
128 - raise RuntimeError("CLIP client not connected. Call connect_clip() first.")  
129 -  
130 - image_path = Path(image_path)  
131 - if not image_path.exists():  
132 - raise FileNotFoundError(f"Image not found: {image_path}")  
133 -  
134 - try:  
135 - # Get embedding from CLIP server using image path (as string)  
136 - result = self.clip_client.encode([str(image_path)])  
137 -  
138 - # Extract embedding - result is numpy array  
139 - import numpy as np  
140 -  
141 - if isinstance(result, np.ndarray):  
142 - # If result is numpy array, use first element  
143 - embedding = (  
144 - result[0].tolist() if len(result.shape) > 1 else result.tolist()  
145 - )  
146 - else:  
147 - # If result is DocumentArray  
148 - embedding = result[0].embedding.tolist()  
149 -  
150 - logger.debug(f"Generated image embedding for: {image_path.name}")  
151 - return embedding  
152 -  
153 - except Exception as e:  
154 - logger.error(f"Failed to generate image embedding for {image_path}: {e}")  
155 - raise  
156 -  
157 - def get_image_embeddings_batch(  
158 - self, image_paths: List[Union[str, Path]], batch_size: int = 32  
159 - ) -> List[Optional[List[float]]]:  
160 - """Get CLIP embeddings for multiple images in batches  
161 -  
162 - Args:  
163 - image_paths: List of paths to image files  
164 - batch_size: Number of images to process at once  
165 -  
166 - Returns:  
167 - List of embedding vectors (None for failed images)  
168 - """  
169 - if not self.clip_client:  
170 - raise RuntimeError("CLIP client not connected. Call connect_clip() first.")  
171 -  
172 - all_embeddings = []  
173 -  
174 - for i in range(0, len(image_paths), batch_size):  
175 - batch_paths = image_paths[i : i + batch_size]  
176 - valid_paths = []  
177 - valid_indices = []  
178 -  
179 - # Check which images exist  
180 - for idx, path in enumerate(batch_paths):  
181 - path = Path(path)  
182 - if path.exists():  
183 - valid_paths.append(str(path))  
184 - valid_indices.append(idx)  
185 - else:  
186 - logger.warning(f"Image not found: {path}")  
187 -  
188 - # Get embeddings for valid images  
189 - if valid_paths:  
190 - try:  
191 - # Send paths as strings to CLIP server  
192 - result = self.clip_client.encode(valid_paths)  
193 -  
194 - # Create embeddings list with None for missing images  
195 - batch_embeddings = [None] * len(batch_paths)  
196 -  
197 - # Handle result format - could be numpy array or DocumentArray  
198 - import numpy as np  
199 -  
200 - if isinstance(result, np.ndarray):  
201 - # Result is numpy array - shape (n_images, embedding_dim)  
202 - for idx in range(len(result)):  
203 - original_idx = valid_indices[idx]  
204 - batch_embeddings[original_idx] = result[idx].tolist()  
205 - else:  
206 - # Result is DocumentArray  
207 - for idx, doc in enumerate(result):  
208 - original_idx = valid_indices[idx]  
209 - batch_embeddings[original_idx] = doc.embedding.tolist()  
210 -  
211 - all_embeddings.extend(batch_embeddings)  
212 -  
213 - logger.info(  
214 - f"Generated image embeddings for batch {i // batch_size + 1}: "  
215 - f"{len(valid_paths)}/{len(batch_paths)} successful"  
216 - )  
217 -  
218 - except Exception as e:  
219 - logger.error(  
220 - f"Failed to generate image embeddings for batch {i // batch_size + 1}: {e}"  
221 - )  
222 - # Add None for all images in failed batch  
223 - all_embeddings.extend([None] * len(batch_paths))  
224 - else:  
225 - # All images in batch failed to load  
226 - all_embeddings.extend([None] * len(batch_paths))  
227 -  
228 - return all_embeddings  
229 -  
230 - def get_text_embedding_from_image(  
231 - self, image_path: Union[str, Path]  
232 - ) -> List[float]:  
233 - """Get text-based embedding by describing the image  
234 - This is useful for cross-modal search  
235 -  
236 - Note: This is a placeholder for future implementation  
237 - that could use vision models to generate text descriptions  
238 -  
239 - Args:  
240 - image_path: Path to image file  
241 -  
242 - Returns:  
243 - Text embedding vector  
244 - """  
245 - # For now, we just return the image embedding  
246 - # In the future, this could use a vision-language model to generate  
247 - # a text description and then embed that  
248 - raise NotImplementedError("Text embedding from image not yet implemented")  
249 -  
250 - def cosine_similarity(  
251 - self, embedding1: List[float], embedding2: List[float]  
252 - ) -> float:  
253 - """Calculate cosine similarity between two embeddings  
254 -  
255 - Args:  
256 - embedding1: First embedding vector  
257 - embedding2: Second embedding vector  
258 -  
259 - Returns:  
260 - Cosine similarity score (0-1)  
261 - """  
262 - vec1 = np.array(embedding1)  
263 - vec2 = np.array(embedding2)  
264 -  
265 - # Normalize vectors  
266 - vec1_norm = vec1 / np.linalg.norm(vec1)  
267 - vec2_norm = vec2 / np.linalg.norm(vec2)  
268 -  
269 - # Calculate cosine similarity  
270 - similarity = np.dot(vec1_norm, vec2_norm)  
271 -  
272 - return float(similarity)  
273 -  
274 - def get_embedding_dimensions(self) -> dict:  
275 - """Get the dimensions of text and image embeddings  
276 -  
277 - Returns:  
278 - Dictionary with text_dim and image_dim  
279 - """  
280 - return {"text_dim": settings.text_dim, "image_dim": settings.image_dim}  
281 -  
282 -  
283 -# Global instance  
284 -_embedding_service: Optional[EmbeddingService] = None  
285 -  
286 -  
287 -def get_embedding_service() -> EmbeddingService:  
288 - """Get or create the global embedding service instance"""  
289 - global _embedding_service  
290 - if _embedding_service is None:  
291 - _embedding_service = EmbeddingService()  
292 - _embedding_service.connect_clip()  
293 - return _embedding_service  
app/services/milvus_service.py deleted
@@ -1,480 +0,0 @@ @@ -1,480 +0,0 @@
1 -"""  
2 -Milvus Service for Vector Storage and Similarity Search  
3 -Manages text and image embeddings in separate collections  
4 -"""  
5 -  
6 -import logging  
7 -from typing import Any, Dict, List, Optional  
8 -  
9 -from pymilvus import (  
10 - DataType,  
11 - MilvusClient,  
12 -)  
13 -  
14 -from app.config import settings  
15 -  
16 -logger = logging.getLogger(__name__)  
17 -  
18 -  
19 -class MilvusService:  
20 - """Service for managing vector embeddings in Milvus"""  
21 -  
22 - def __init__(self, uri: Optional[str] = None):  
23 - """Initialize Milvus service  
24 -  
25 - Args:  
26 - uri: Milvus connection URI. If None, uses settings.milvus_uri  
27 - """  
28 - if uri:  
29 - self.uri = uri  
30 - else:  
31 - # Use absolute path for Milvus Lite  
32 - self.uri = settings.milvus_uri_absolute  
33 - self.text_collection_name = settings.text_collection_name  
34 - self.image_collection_name = settings.image_collection_name  
35 - self.text_dim = settings.text_dim  
36 - self.image_dim = settings.image_dim  
37 -  
38 - # Use MilvusClient for simplified operations  
39 - self._client: Optional[MilvusClient] = None  
40 -  
41 - logger.info(f"Initializing Milvus service with URI: {self.uri}")  
42 -  
43 - def is_connected(self) -> bool:  
44 - """Check if connected to Milvus"""  
45 - return self._client is not None  
46 -  
47 - def connect(self) -> None:  
48 - """Connect to Milvus"""  
49 - if self.is_connected():  
50 - return  
51 - try:  
52 - self._client = MilvusClient(uri=self.uri)  
53 - logger.info(f"Connected to Milvus at {self.uri}")  
54 - except Exception as e:  
55 - logger.error(f"Failed to connect to Milvus: {e}")  
56 - raise  
57 -  
58 - def disconnect(self) -> None:  
59 - """Disconnect from Milvus"""  
60 - if self._client:  
61 - self._client.close()  
62 - self._client = None  
63 - logger.info("Disconnected from Milvus")  
64 -  
65 - @property  
66 - def client(self) -> MilvusClient:  
67 - """Get the Milvus client"""  
68 - if not self._client:  
69 - raise RuntimeError("Milvus not connected. Call connect() first.")  
70 - return self._client  
71 -  
72 - def create_text_collection(self, recreate: bool = False) -> None:  
73 - """Create collection for text embeddings with product metadata  
74 -  
75 - Args:  
76 - recreate: If True, drop existing collection and recreate  
77 - """  
78 - if recreate and self.client.has_collection(self.text_collection_name):  
79 - self.client.drop_collection(self.text_collection_name)  
80 - logger.info(f"Dropped existing collection: {self.text_collection_name}")  
81 -  
82 - if self.client.has_collection(self.text_collection_name):  
83 - logger.info(f"Text collection already exists: {self.text_collection_name}")  
84 - return  
85 -  
86 - # Create collection with schema (includes metadata fields)  
87 - schema = MilvusClient.create_schema(  
88 - auto_id=False,  
89 - enable_dynamic_field=True, # Allow additional metadata fields  
90 - )  
91 -  
92 - # Core fields  
93 - schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)  
94 - schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=2000)  
95 - schema.add_field(  
96 - field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=self.text_dim  
97 - )  
98 -  
99 - # Product metadata fields  
100 - schema.add_field(  
101 - field_name="productDisplayName", datatype=DataType.VARCHAR, max_length=500  
102 - )  
103 - schema.add_field(field_name="gender", datatype=DataType.VARCHAR, max_length=50)  
104 - schema.add_field(  
105 - field_name="masterCategory", datatype=DataType.VARCHAR, max_length=100  
106 - )  
107 - schema.add_field(  
108 - field_name="subCategory", datatype=DataType.VARCHAR, max_length=100  
109 - )  
110 - schema.add_field(  
111 - field_name="articleType", datatype=DataType.VARCHAR, max_length=100  
112 - )  
113 - schema.add_field(  
114 - field_name="baseColour", datatype=DataType.VARCHAR, max_length=50  
115 - )  
116 - schema.add_field(field_name="season", datatype=DataType.VARCHAR, max_length=50)  
117 - schema.add_field(field_name="usage", datatype=DataType.VARCHAR, max_length=50)  
118 -  
119 - # Create index parameters  
120 - index_params = self.client.prepare_index_params()  
121 - index_params.add_index(  
122 - field_name="embedding",  
123 - index_type="AUTOINDEX",  
124 - metric_type="COSINE",  
125 - )  
126 -  
127 - # Create collection  
128 - self.client.create_collection(  
129 - collection_name=self.text_collection_name,  
130 - schema=schema,  
131 - index_params=index_params,  
132 - )  
133 -  
134 - logger.info(  
135 - f"Created text collection with metadata: {self.text_collection_name}"  
136 - )  
137 -  
138 - def create_image_collection(self, recreate: bool = False) -> None:  
139 - """Create collection for image embeddings with product metadata  
140 -  
141 - Args:  
142 - recreate: If True, drop existing collection and recreate  
143 - """  
144 - if recreate and self.client.has_collection(self.image_collection_name):  
145 - self.client.drop_collection(self.image_collection_name)  
146 - logger.info(f"Dropped existing collection: {self.image_collection_name}")  
147 -  
148 - if self.client.has_collection(self.image_collection_name):  
149 - logger.info(  
150 - f"Image collection already exists: {self.image_collection_name}"  
151 - )  
152 - return  
153 -  
154 - # Create collection with schema (includes metadata fields)  
155 - schema = MilvusClient.create_schema(  
156 - auto_id=False,  
157 - enable_dynamic_field=True, # Allow additional metadata fields  
158 - )  
159 -  
160 - # Core fields  
161 - schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)  
162 - schema.add_field(  
163 - field_name="image_path", datatype=DataType.VARCHAR, max_length=500  
164 - )  
165 - schema.add_field(  
166 - field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=self.image_dim  
167 - )  
168 -  
169 - # Product metadata fields  
170 - schema.add_field(  
171 - field_name="productDisplayName", datatype=DataType.VARCHAR, max_length=500  
172 - )  
173 - schema.add_field(field_name="gender", datatype=DataType.VARCHAR, max_length=50)  
174 - schema.add_field(  
175 - field_name="masterCategory", datatype=DataType.VARCHAR, max_length=100  
176 - )  
177 - schema.add_field(  
178 - field_name="subCategory", datatype=DataType.VARCHAR, max_length=100  
179 - )  
180 - schema.add_field(  
181 - field_name="articleType", datatype=DataType.VARCHAR, max_length=100  
182 - )  
183 - schema.add_field(  
184 - field_name="baseColour", datatype=DataType.VARCHAR, max_length=50  
185 - )  
186 - schema.add_field(field_name="season", datatype=DataType.VARCHAR, max_length=50)  
187 - schema.add_field(field_name="usage", datatype=DataType.VARCHAR, max_length=50)  
188 -  
189 - # Create index parameters  
190 - index_params = self.client.prepare_index_params()  
191 - index_params.add_index(  
192 - field_name="embedding",  
193 - index_type="AUTOINDEX",  
194 - metric_type="COSINE",  
195 - )  
196 -  
197 - # Create collection  
198 - self.client.create_collection(  
199 - collection_name=self.image_collection_name,  
200 - schema=schema,  
201 - index_params=index_params,  
202 - )  
203 -  
204 - logger.info(  
205 - f"Created image collection with metadata: {self.image_collection_name}"  
206 - )  
207 -  
208 - def insert_text_embeddings(  
209 - self,  
210 - embeddings: List[Dict[str, Any]],  
211 - ) -> int:  
212 - """Insert text embeddings with metadata into collection  
213 -  
214 - Args:  
215 - embeddings: List of dictionaries with keys:  
216 - - id: unique ID (product ID)  
217 - - text: the text that was embedded  
218 - - embedding: the embedding vector  
219 - - productDisplayName, gender, masterCategory, etc. (metadata)  
220 -  
221 - Returns:  
222 - Number of inserted embeddings  
223 - """  
224 - if not embeddings:  
225 - return 0  
226 -  
227 - try:  
228 - # Insert data directly (all fields including metadata)  
229 - # Milvus will accept all fields defined in schema + dynamic fields  
230 - data = embeddings  
231 -  
232 - # Insert data  
233 - result = self.client.insert(  
234 - collection_name=self.text_collection_name,  
235 - data=data,  
236 - )  
237 -  
238 - logger.info(f"Inserted {len(data)} text embeddings")  
239 - return len(data)  
240 -  
241 - except Exception as e:  
242 - logger.error(f"Failed to insert text embeddings: {e}")  
243 - raise  
244 -  
245 - def insert_image_embeddings(  
246 - self,  
247 - embeddings: List[Dict[str, Any]],  
248 - ) -> int:  
249 - """Insert image embeddings with metadata into collection  
250 -  
251 - Args:  
252 - embeddings: List of dictionaries with keys:  
253 - - id: unique ID (product ID)  
254 - - image_path: path to the image file  
255 - - embedding: the embedding vector  
256 - - productDisplayName, gender, masterCategory, etc. (metadata)  
257 -  
258 - Returns:  
259 - Number of inserted embeddings  
260 - """  
261 - if not embeddings:  
262 - return 0  
263 -  
264 - try:  
265 - # Insert data directly (all fields including metadata)  
266 - # Milvus will accept all fields defined in schema + dynamic fields  
267 - data = embeddings  
268 -  
269 - # Insert data  
270 - result = self.client.insert(  
271 - collection_name=self.image_collection_name,  
272 - data=data,  
273 - )  
274 -  
275 - logger.info(f"Inserted {len(data)} image embeddings")  
276 - return len(data)  
277 -  
278 - except Exception as e:  
279 - logger.error(f"Failed to insert image embeddings: {e}")  
280 - raise  
281 -  
282 - def search_similar_text(  
283 - self,  
284 - query_embedding: List[float],  
285 - limit: int = 10,  
286 - filters: Optional[str] = None,  
287 - output_fields: Optional[List[str]] = None,  
288 - ) -> List[Dict[str, Any]]:  
289 - """Search for similar text embeddings  
290 -  
291 - Args:  
292 - query_embedding: Query embedding vector  
293 - limit: Maximum number of results  
294 - filters: Filter expression (e.g., "product_id in [1, 2, 3]")  
295 - output_fields: List of fields to return  
296 -  
297 - Returns:  
298 - List of search results with fields:  
299 - - id: embedding ID  
300 - - distance: similarity distance  
301 - - entity: the matched entity with requested fields  
302 - """  
303 - try:  
304 - if output_fields is None:  
305 - output_fields = [  
306 - "id",  
307 - "text",  
308 - "productDisplayName",  
309 - "gender",  
310 - "masterCategory",  
311 - "subCategory",  
312 - "articleType",  
313 - "baseColour",  
314 - ]  
315 -  
316 - search_params = {}  
317 - if filters:  
318 - search_params["expr"] = filters  
319 -  
320 - results = self.client.search(  
321 - collection_name=self.text_collection_name,  
322 - data=[query_embedding],  
323 - limit=limit,  
324 - output_fields=output_fields,  
325 - search_params=search_params,  
326 - )  
327 -  
328 - # Format results  
329 - formatted_results = []  
330 - if results and len(results) > 0:  
331 - for hit in results[0]:  
332 - result = {"id": hit.get("id"), "distance": hit.get("distance")}  
333 - # Extract fields from entity  
334 - entity = hit.get("entity", {})  
335 - for field in output_fields:  
336 - if field in entity:  
337 - result[field] = entity.get(field)  
338 - formatted_results.append(result)  
339 -  
340 - logger.debug(f"Found {len(formatted_results)} similar text embeddings")  
341 - return formatted_results  
342 -  
343 - except Exception as e:  
344 - logger.error(f"Failed to search similar text: {e}")  
345 - raise  
346 -  
347 - def search_similar_images(  
348 - self,  
349 - query_embedding: List[float],  
350 - limit: int = 10,  
351 - filters: Optional[str] = None,  
352 - output_fields: Optional[List[str]] = None,  
353 - ) -> List[Dict[str, Any]]:  
354 - """Search for similar image embeddings  
355 -  
356 - Args:  
357 - query_embedding: Query embedding vector  
358 - limit: Maximum number of results  
359 - filters: Filter expression (e.g., "product_id in [1, 2, 3]")  
360 - output_fields: List of fields to return  
361 -  
362 - Returns:  
363 - List of search results with fields:  
364 - - id: embedding ID  
365 - - distance: similarity distance  
366 - - entity: the matched entity with requested fields  
367 - """  
368 - try:  
369 - if output_fields is None:  
370 - output_fields = [  
371 - "id",  
372 - "image_path",  
373 - "productDisplayName",  
374 - "gender",  
375 - "masterCategory",  
376 - "subCategory",  
377 - "articleType",  
378 - "baseColour",  
379 - ]  
380 -  
381 - search_params = {}  
382 - if filters:  
383 - search_params["expr"] = filters  
384 -  
385 - results = self.client.search(  
386 - collection_name=self.image_collection_name,  
387 - data=[query_embedding],  
388 - limit=limit,  
389 - output_fields=output_fields,  
390 - search_params=search_params,  
391 - )  
392 -  
393 - # Format results  
394 - formatted_results = []  
395 - if results and len(results) > 0:  
396 - for hit in results[0]:  
397 - result = {"id": hit.get("id"), "distance": hit.get("distance")}  
398 - # Extract fields from entity  
399 - entity = hit.get("entity", {})  
400 - for field in output_fields:  
401 - if field in entity:  
402 - result[field] = entity.get(field)  
403 - formatted_results.append(result)  
404 -  
405 - logger.debug(f"Found {len(formatted_results)} similar image embeddings")  
406 - return formatted_results  
407 -  
408 - except Exception as e:  
409 - logger.error(f"Failed to search similar images: {e}")  
410 - raise  
411 -  
412 - def get_collection_stats(self, collection_name: str) -> Dict[str, Any]:  
413 - """Get statistics for a collection  
414 -  
415 - Args:  
416 - collection_name: Name of the collection  
417 -  
418 - Returns:  
419 - Dictionary with collection statistics  
420 - """  
421 - try:  
422 - stats = self.client.get_collection_stats(collection_name)  
423 - return {  
424 - "collection_name": collection_name,  
425 - "row_count": stats.get("row_count", 0),  
426 - }  
427 - except Exception as e:  
428 - logger.error(f"Failed to get collection stats: {e}")  
429 - return {"collection_name": collection_name, "row_count": 0}  
430 -  
431 - def delete_by_ids(self, collection_name: str, ids: List[int]) -> int:  
432 - """Delete embeddings by IDs  
433 -  
434 - Args:  
435 - collection_name: Name of the collection  
436 - ids: List of IDs to delete  
437 -  
438 - Returns:  
439 - Number of deleted embeddings  
440 - """  
441 - if not ids:  
442 - return 0  
443 -  
444 - try:  
445 - self.client.delete(  
446 - collection_name=collection_name,  
447 - ids=ids,  
448 - )  
449 - logger.info(f"Deleted {len(ids)} embeddings from {collection_name}")  
450 - return len(ids)  
451 - except Exception as e:  
452 - logger.error(f"Failed to delete embeddings: {e}")  
453 - raise  
454 -  
455 - def clear_collection(self, collection_name: str) -> None:  
456 - """Clear all data from a collection  
457 -  
458 - Args:  
459 - collection_name: Name of the collection  
460 - """  
461 - try:  
462 - if self.client.has_collection(collection_name):  
463 - self.client.drop_collection(collection_name)  
464 - logger.info(f"Dropped collection: {collection_name}")  
465 - except Exception as e:  
466 - logger.error(f"Failed to clear collection: {e}")  
467 - raise  
468 -  
469 -  
470 -# Global instance  
471 -_milvus_service: Optional[MilvusService] = None  
472 -  
473 -  
474 -def get_milvus_service() -> MilvusService:  
475 - """Get or create the global Milvus service instance"""  
476 - global _milvus_service  
477 - if _milvus_service is None:  
478 - _milvus_service = MilvusService()  
479 - _milvus_service.connect()  
480 - return _milvus_service  
app/tools/__init__.py
@@ -5,13 +5,11 @@ LangChain Tools for Product Search and Discovery @@ -5,13 +5,11 @@ LangChain Tools for Product Search and Discovery
5 from app.tools.search_tools import ( 5 from app.tools.search_tools import (
6 analyze_image_style, 6 analyze_image_style,
7 get_all_tools, 7 get_all_tools,
8 - search_by_image,  
9 search_products, 8 search_products,
10 ) 9 )
11 10
12 __all__ = [ 11 __all__ = [
13 "search_products", 12 "search_products",
14 - "search_by_image",  
15 "analyze_image_style", 13 "analyze_image_style",
16 "get_all_tools", 14 "get_all_tools",
17 ] 15 ]
app/tools/search_tools.py
1 """ 1 """
2 Search Tools for Product Discovery 2 Search Tools for Product Discovery
3 -Provides text-based, image-based, and VLM reasoning capabilities 3 +Provides text-based search via Search API and VLM style analysis
4 """ 4 """
5 5
6 import base64 6 import base64
@@ -8,40 +8,24 @@ import logging @@ -8,40 +8,24 @@ import logging
8 from pathlib import Path 8 from pathlib import Path
9 from typing import Optional 9 from typing import Optional
10 10
  11 +import requests
11 from langchain_core.tools import tool 12 from langchain_core.tools import tool
12 from openai import OpenAI 13 from openai import OpenAI
13 14
14 from app.config import settings 15 from app.config import settings
15 -from app.services.embedding_service import EmbeddingService  
16 -from app.services.milvus_service import MilvusService  
17 16
18 logger = logging.getLogger(__name__) 17 logger = logging.getLogger(__name__)
19 18
20 -# Initialize services as singletons  
21 -_embedding_service: Optional[EmbeddingService] = None  
22 -_milvus_service: Optional[MilvusService] = None  
23 _openai_client: Optional[OpenAI] = None 19 _openai_client: Optional[OpenAI] = None
24 20
25 21
26 -def get_embedding_service() -> EmbeddingService:  
27 - global _embedding_service  
28 - if _embedding_service is None:  
29 - _embedding_service = EmbeddingService()  
30 - return _embedding_service  
31 -  
32 -  
33 -def get_milvus_service() -> MilvusService:  
34 - global _milvus_service  
35 - if _milvus_service is None:  
36 - _milvus_service = MilvusService()  
37 - _milvus_service.connect()  
38 - return _milvus_service  
39 -  
40 -  
41 def get_openai_client() -> OpenAI: 22 def get_openai_client() -> OpenAI:
42 global _openai_client 23 global _openai_client
43 if _openai_client is None: 24 if _openai_client is None:
44 - _openai_client = OpenAI(api_key=settings.openai_api_key) 25 + kwargs = {"api_key": settings.openai_api_key}
  26 + if settings.openai_api_base_url:
  27 + kwargs["base_url"] = settings.openai_api_base_url
  28 + _openai_client = OpenAI(**kwargs)
45 return _openai_client 29 return _openai_client
46 30
47 31
@@ -64,30 +48,26 @@ def search_products(query: str, limit: int = 5) -> str: @@ -64,30 +48,26 @@ def search_products(query: str, limit: int = 5) -> str:
64 try: 48 try:
65 logger.info(f"Searching products: '{query}', limit: {limit}") 49 logger.info(f"Searching products: '{query}', limit: {limit}")
66 50
67 - embedding_service = get_embedding_service()  
68 - milvus_service = get_milvus_service()  
69 -  
70 - if not milvus_service.is_connected():  
71 - milvus_service.connect()  
72 -  
73 - query_embedding = embedding_service.get_text_embedding(query)  
74 -  
75 - results = milvus_service.search_similar_text(  
76 - query_embedding=query_embedding,  
77 - limit=min(limit, 20),  
78 - filters=None,  
79 - output_fields=[  
80 - "id",  
81 - "productDisplayName",  
82 - "gender",  
83 - "masterCategory",  
84 - "subCategory",  
85 - "articleType",  
86 - "baseColour",  
87 - "season",  
88 - "usage",  
89 - ],  
90 - ) 51 + url = f"{settings.search_api_base_url.rstrip('/')}/search/"
  52 + headers = {
  53 + "Content-Type": "application/json",
  54 + "X-Tenant-ID": settings.search_api_tenant_id,
  55 + }
  56 + payload = {
  57 + "query": query,
  58 + "size": min(limit, 20),
  59 + "from": 0,
  60 + "language": "zh",
  61 + }
  62 +
  63 + response = requests.post(url, json=payload, headers=headers, timeout=60)
  64 +
  65 + if response.status_code != 200:
  66 + logger.error(f"Search API error: {response.status_code} - {response.text}")
  67 + return f"Error searching products: API returned {response.status_code}"
  68 +
  69 + data = response.json()
  70 + results = data.get("results", [])
91 71
92 if not results: 72 if not results:
93 return "No products found matching your search." 73 return "No products found matching your search."
@@ -95,131 +75,40 @@ def search_products(query: str, limit: int = 5) -> str: @@ -95,131 +75,40 @@ def search_products(query: str, limit: int = 5) -> str:
95 output = f"Found {len(results)} product(s):\n\n" 75 output = f"Found {len(results)} product(s):\n\n"
96 76
97 for idx, product in enumerate(results, 1): 77 for idx, product in enumerate(results, 1):
98 - output += f"{idx}. {product.get('productDisplayName', 'Unknown Product')}\n"  
99 - output += f" ID: {product.get('id', 'N/A')}\n"  
100 - output += f" Category: {product.get('masterCategory', 'N/A')} > {product.get('subCategory', 'N/A')} > {product.get('articleType', 'N/A')}\n"  
101 - output += f" Color: {product.get('baseColour', 'N/A')}\n"  
102 - output += f" Gender: {product.get('gender', 'N/A')}\n"  
103 -  
104 - if product.get("season"):  
105 - output += f" Season: {product.get('season')}\n"  
106 - if product.get("usage"):  
107 - output += f" Usage: {product.get('usage')}\n"  
108 -  
109 - if "distance" in product:  
110 - similarity = 1 - product["distance"]  
111 - output += f" Relevance: {similarity:.2%}\n" 78 + output += f"{idx}. {product.get('title', 'Unknown Product')}\n"
  79 + output += f" ID: {product.get('spu_id', 'N/A')}\n"
  80 + output += f" Category: {product.get('category_path', product.get('category_name', 'N/A'))}\n"
  81 + if product.get("vendor"):
  82 + output += f" Brand: {product.get('vendor')}\n"
  83 + if product.get("price") is not None:
  84 + output += f" Price: {product.get('price')}\n"
  85 +
  86 + # 规格/颜色信息
  87 + specs = product.get("specifications", [])
  88 + if specs:
  89 + color_spec = next(
  90 + (s for s in specs if s.get("name") == "color"),
  91 + None,
  92 + )
  93 + if color_spec:
  94 + output += f" Color: {color_spec.get('value', 'N/A')}\n"
  95 +
  96 + if product.get("relevance_score") is not None:
  97 + output += f" Relevance: {product['relevance_score']:.2f}\n"
112 98
113 output += "\n" 99 output += "\n"
114 100
115 return output.strip() 101 return output.strip()
116 102
  103 + except requests.exceptions.RequestException as e:
  104 + logger.error(f"Error searching products (network): {e}", exc_info=True)
  105 + return f"Error searching products: {str(e)}"
117 except Exception as e: 106 except Exception as e:
118 logger.error(f"Error searching products: {e}", exc_info=True) 107 logger.error(f"Error searching products: {e}", exc_info=True)
119 return f"Error searching products: {str(e)}" 108 return f"Error searching products: {str(e)}"
120 109
121 110
122 @tool 111 @tool
123 -def search_by_image(image_path: str, limit: int = 5) -> str:  
124 - """Find similar fashion products using an image.  
125 -  
126 - Use when users want visually similar items:  
127 - - User uploads an image and asks "find similar items"  
128 - - "Show me products that look like this"  
129 -  
130 - Args:  
131 - image_path: Path to the image file  
132 - limit: Maximum number of results (1-20)  
133 -  
134 - Returns:  
135 - Formatted string with similar products  
136 - """  
137 - try:  
138 - logger.info(f"Image search: '{image_path}', limit: {limit}")  
139 -  
140 - img_path = Path(image_path)  
141 - if not img_path.exists():  
142 - return f"Error: Image file not found at '{image_path}'"  
143 -  
144 - embedding_service = get_embedding_service()  
145 - milvus_service = get_milvus_service()  
146 -  
147 - if not milvus_service.is_connected():  
148 - milvus_service.connect()  
149 -  
150 - if (  
151 - not hasattr(embedding_service, "clip_client")  
152 - or embedding_service.clip_client is None  
153 - ):  
154 - embedding_service.connect_clip()  
155 -  
156 - image_embedding = embedding_service.get_image_embedding(image_path)  
157 -  
158 - if image_embedding is None:  
159 - return "Error: Failed to generate embedding for image"  
160 -  
161 - results = milvus_service.search_similar_images(  
162 - query_embedding=image_embedding,  
163 - limit=min(limit + 1, 21),  
164 - filters=None,  
165 - output_fields=[  
166 - "id",  
167 - "image_path",  
168 - "productDisplayName",  
169 - "gender",  
170 - "masterCategory",  
171 - "subCategory",  
172 - "articleType",  
173 - "baseColour",  
174 - "season",  
175 - "usage",  
176 - ],  
177 - )  
178 -  
179 - if not results:  
180 - return "No similar products found."  
181 -  
182 - # Filter out the query image itself  
183 - query_id = img_path.stem  
184 - filtered_results = []  
185 - for result in results:  
186 - result_path = result.get("image_path", "")  
187 - if Path(result_path).stem != query_id:  
188 - filtered_results.append(result)  
189 - if len(filtered_results) >= limit:  
190 - break  
191 -  
192 - if not filtered_results:  
193 - return "No similar products found."  
194 -  
195 - output = f"Found {len(filtered_results)} visually similar product(s):\n\n"  
196 -  
197 - for idx, product in enumerate(filtered_results, 1):  
198 - output += f"{idx}. {product.get('productDisplayName', 'Unknown Product')}\n"  
199 - output += f" ID: {product.get('id', 'N/A')}\n"  
200 - output += f" Category: {product.get('masterCategory', 'N/A')} > {product.get('subCategory', 'N/A')} > {product.get('articleType', 'N/A')}\n"  
201 - output += f" Color: {product.get('baseColour', 'N/A')}\n"  
202 - output += f" Gender: {product.get('gender', 'N/A')}\n"  
203 -  
204 - if product.get("season"):  
205 - output += f" Season: {product.get('season')}\n"  
206 - if product.get("usage"):  
207 - output += f" Usage: {product.get('usage')}\n"  
208 -  
209 - if "distance" in product:  
210 - similarity = 1 - product["distance"]  
211 - output += f" Visual Similarity: {similarity:.2%}\n"  
212 -  
213 - output += "\n"  
214 -  
215 - return output.strip()  
216 -  
217 - except Exception as e:  
218 - logger.error(f"Error in image search: {e}", exc_info=True)  
219 - return f"Error searching by image: {str(e)}"  
220 -  
221 -  
222 -@tool  
223 def analyze_image_style(image_path: str) -> str: 112 def analyze_image_style(image_path: str) -> str:
224 """Analyze a fashion product image using AI vision to extract detailed style information. 113 """Analyze a fashion product image using AI vision to extract detailed style information.
225 114
@@ -291,4 +180,4 @@ Provide a comprehensive yet concise description (3-4 sentences).""" @@ -291,4 +180,4 @@ Provide a comprehensive yet concise description (3-4 sentences)."""
291 180
292 def get_all_tools(): 181 def get_all_tools():
293 """Get all available tools for the agent""" 182 """Get all available tools for the agent"""
294 - return [search_products, search_by_image, analyze_image_style] 183 + return [search_products, analyze_image_style]
docker-compose.yml deleted
@@ -1,76 +0,0 @@ @@ -1,76 +0,0 @@
1 -version: '3.5'  
2 -  
3 -services:  
4 - etcd:  
5 - container_name: milvus-etcd  
6 - image: quay.io/coreos/etcd:v3.5.5  
7 - environment:  
8 - - ETCD_AUTO_COMPACTION_MODE=revision  
9 - - ETCD_AUTO_COMPACTION_RETENTION=1000  
10 - - ETCD_QUOTA_BACKEND_BYTES=4294967296  
11 - - ETCD_SNAPSHOT_COUNT=50000  
12 - volumes:  
13 - - ./volumes/etcd:/etcd  
14 - command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd  
15 - healthcheck:  
16 - test: ["CMD", "etcdctl", "endpoint", "health"]  
17 - interval: 30s  
18 - timeout: 20s  
19 - retries: 3  
20 -  
21 - minio:  
22 - container_name: milvus-minio  
23 - image: minio/minio:RELEASE.2023-03-20T20-16-18Z  
24 - environment:  
25 - MINIO_ACCESS_KEY: minioadmin  
26 - MINIO_SECRET_KEY: minioadmin  
27 - ports:  
28 - - "9001:9001"  
29 - - "9000:9000"  
30 - volumes:  
31 - - ./volumes/minio:/minio_data  
32 - command: minio server /minio_data --console-address ":9001"  
33 - healthcheck:  
34 - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]  
35 - interval: 30s  
36 - timeout: 20s  
37 - retries: 3  
38 -  
39 - standalone:  
40 - container_name: milvus-standalone  
41 - image: milvusdb/milvus:v2.4.0  
42 - command: ["milvus", "run", "standalone"]  
43 - security_opt:  
44 - - seccomp:unconfined  
45 - environment:  
46 - ETCD_ENDPOINTS: etcd:2379  
47 - MINIO_ADDRESS: minio:9000  
48 - volumes:  
49 - - ./volumes/milvus:/var/lib/milvus  
50 - healthcheck:  
51 - test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]  
52 - interval: 30s  
53 - start_period: 90s  
54 - timeout: 20s  
55 - retries: 3  
56 - ports:  
57 - - "19530:19530"  
58 - - "9091:9091"  
59 - depends_on:  
60 - - "etcd"  
61 - - "minio"  
62 -  
63 - attu:  
64 - container_name: milvus-attu  
65 - image: zilliz/attu:v2.4  
66 - environment:  
67 - MILVUS_URL: milvus-standalone:19530  
68 - ports:  
69 - - "8000:3000"  
70 - depends_on:  
71 - - "standalone"  
72 -  
73 -networks:  
74 - default:  
75 - name: milvus  
76 -  
docs/DEPLOY_CENTOS8.md
1 -# OmniShopAgent centOS 8 部署指南 1 +# OmniShopAgent CentOS 8 部署指南
2 2
3 ## 一、环境要求 3 ## 一、环境要求
4 4
@@ -6,8 +6,8 @@ @@ -6,8 +6,8 @@
6 |------|------| 6 |------|------|
7 | 操作系统 | CentOS 8.x | 7 | 操作系统 | CentOS 8.x |
8 | Python | 3.12+(LangChain 1.x 要求 3.10+) | 8 | Python | 3.12+(LangChain 1.x 要求 3.10+) |
9 -| 内存 | 建议 8GB+(Milvus + CLIP 较占内存) |  
10 -| 磁盘 | 建议 20GB+(含数据集) | 9 +| 内存 | 建议 4GB+ |
  10 +| 磁盘 | 建议 10GB+ |
11 11
12 ## 二、快速部署步骤 12 ## 二、快速部署步骤
13 13
@@ -21,7 +21,6 @@ chmod +x scripts/*.sh @@ -21,7 +21,6 @@ chmod +x scripts/*.sh
21 21
22 该脚本会: 22 该脚本会:
23 - 安装系统依赖(gcc、openssl-devel 等) 23 - 安装系统依赖(gcc、openssl-devel 等)
24 -- 安装 Docker(用于 Milvus)  
25 - 安装 Python 3.12(conda 或源码编译) 24 - 安装 Python 3.12(conda 或源码编译)
26 - 创建虚拟环境并安装 requirements.txt 25 - 创建虚拟环境并安装 requirements.txt
27 26
@@ -59,17 +58,7 @@ make -j $(nproc) @@ -59,17 +58,7 @@ make -j $(nproc)
59 sudo make altinstall 58 sudo make altinstall
60 ``` 59 ```
61 60
62 -#### 步骤 3:安装 Docker  
63 -  
64 -```bash  
65 -sudo dnf config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo  
66 -sudo dnf install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin  
67 -sudo systemctl enable docker && sudo systemctl start docker  
68 -sudo usermod -aG docker $USER  
69 -# 执行 newgrp docker 或重新登录  
70 -```  
71 -  
72 -#### 步骤 4:创建虚拟环境并安装依赖 61 +#### 步骤 3:创建虚拟环境并安装依赖
73 62
74 ```bash 63 ```bash
75 cd /path/to/shop_agent 64 cd /path/to/shop_agent
@@ -79,46 +68,35 @@ pip install -U pip @@ -79,46 +68,35 @@ pip install -U pip
79 pip install -r requirements.txt 68 pip install -r requirements.txt
80 ``` 69 ```
81 70
82 -#### 步骤 5:配置环境变量 71 +#### 步骤 4:配置环境变量
83 72
84 ```bash 73 ```bash
85 cp .env.example .env 74 cp .env.example .env
86 # 编辑 .env,至少配置: 75 # 编辑 .env,至少配置:
87 # OPENAI_API_KEY=sk-xxx 76 # OPENAI_API_KEY=sk-xxx
88 -# MILVUS_HOST=localhost  
89 -# MILVUS_PORT=19530  
90 -# CLIP_SERVER_URL=grpc://localhost:51000 77 +# SEARCH_API_BASE_URL=http://120.76.41.98:6002
  78 +# SEARCH_API_TENANT_ID=162
91 ``` 79 ```
92 80
93 -## 三、数据准备 81 +## 三、数据准备(可选)
94 82
95 ### 3.1 下载数据集 83 ### 3.1 下载数据集
96 84
  85 +如需图片风格分析功能,可下载 Kaggle 数据集:
  86 +
97 ```bash 87 ```bash
98 # 需先配置 Kaggle API:~/.kaggle/kaggle.json 88 # 需先配置 Kaggle API:~/.kaggle/kaggle.json
99 python scripts/download_dataset.py 89 python scripts/download_dataset.py
100 ``` 90 ```
101 91
102 -### 3.2 启动 Milvus 并索引数据  
103 -  
104 -```bash  
105 -# 启动 Milvus  
106 -./scripts/run_milvus.sh  
107 -  
108 -# 等待就绪后,创建索引  
109 -python scripts/index_data.py  
110 -```  
111 -  
112 ## 四、启动服务 92 ## 四、启动服务
113 93
114 ### 4.1 启动脚本说明 94 ### 4.1 启动脚本说明
115 95
116 | 脚本 | 用途 | 96 | 脚本 | 用途 |
117 |------|------| 97 |------|------|
118 -| `start.sh` | 主启动脚本:启动 Milvus + Streamlit |  
119 -| `stop.sh` | 停止所有服务 |  
120 -| `run_milvus.sh` | 仅启动 Milvus |  
121 -| `run_clip.sh` | 仅启动 CLIP(图像搜索需此服务) | 98 +| `start.sh` | 主启动脚本:启动 Streamlit |
  99 +| `stop.sh` | 停止 Streamlit |
122 | `check_services.sh` | 健康检查 | 100 | `check_services.sh` | 健康检查 |
123 101
124 ### 4.2 启动应用 102 ### 4.2 启动应用
@@ -127,14 +105,7 @@ python scripts/index_data.py @@ -127,14 +105,7 @@ python scripts/index_data.py
127 # 方式 1:使用 start.sh(推荐) 105 # 方式 1:使用 start.sh(推荐)
128 ./scripts/start.sh 106 ./scripts/start.sh
129 107
130 -# 方式 2:分步启动  
131 -# 终端 1:Milvus  
132 -./scripts/run_milvus.sh  
133 -  
134 -# 终端 2:CLIP(图像搜索需要)  
135 -./scripts/run_clip.sh  
136 -  
137 -# 终端 3:Streamlit 108 +# 方式 2:直接运行
138 source venv/bin/activate 109 source venv/bin/activate
139 streamlit run app.py --server.port=8501 --server.address=0.0.0.0 110 streamlit run app.py --server.port=8501 --server.address=0.0.0.0
140 ``` 111 ```
@@ -142,7 +113,6 @@ streamlit run app.py --server.port=8501 --server.address=0.0.0.0 @@ -142,7 +113,6 @@ streamlit run app.py --server.port=8501 --server.address=0.0.0.0
142 ### 4.3 访问地址 113 ### 4.3 访问地址
143 114
144 - **Streamlit 应用**:http://服务器IP:8501 115 - **Streamlit 应用**:http://服务器IP:8501
145 -- **Milvus Attu 管理界面**:http://服务器IP:8000  
146 116
147 ## 五、生产部署建议 117 ## 五、生产部署建议
148 118
@@ -153,7 +123,7 @@ streamlit run app.py --server.port=8501 --server.address=0.0.0.0 @@ -153,7 +123,7 @@ streamlit run app.py --server.port=8501 --server.address=0.0.0.0
153 ```ini 123 ```ini
154 [Unit] 124 [Unit]
155 Description=OmniShopAgent Streamlit App 125 Description=OmniShopAgent Streamlit App
156 -After=network.target docker.service 126 +After=network.target
157 127
158 [Service] 128 [Service]
159 Type=simple 129 Type=simple
@@ -194,7 +164,6 @@ server { @@ -194,7 +164,6 @@ server {
194 164
195 ```bash 165 ```bash
196 sudo firewall-cmd --permanent --add-port=8501/tcp 166 sudo firewall-cmd --permanent --add-port=8501/tcp
197 -sudo firewall-cmd --permanent --add-port=19530/tcp  
198 sudo firewall-cmd --reload 167 sudo firewall-cmd --reload
199 ``` 168 ```
200 169
@@ -203,14 +172,8 @@ sudo firewall-cmd --reload @@ -203,14 +172,8 @@ sudo firewall-cmd --reload
203 ### Q: Python 3.12 编译失败? 172 ### Q: Python 3.12 编译失败?
204 A: 确保已安装 `openssl-devel`、`libffi-devel`,或直接使用 Miniconda。 173 A: 确保已安装 `openssl-devel`、`libffi-devel`,或直接使用 Miniconda。
205 174
206 -### Q: Docker 权限不足?  
207 -A: 执行 `sudo usermod -aG docker $USER` 后重新登录。  
208 -  
209 -### Q: Milvus 启动超时?  
210 -A: 首次启动需拉取镜像,可能较慢。可检查 `docker compose logs -f standalone`。  
211 -  
212 -### Q: 图像搜索不可用?  
213 -A: 需单独启动 CLIP 服务:`./scripts/run_clip.sh`。 175 +### Q: Search API 连接失败?
  176 +A: 检查 `.env` 中 `SEARCH_API_BASE_URL` 和 `SEARCH_API_TENANT_ID` 配置,确保网络可访问搜索服务。
214 177
215 ### Q: 健康检查? 178 ### Q: 健康检查?
216 A: 执行 `./scripts/check_services.sh` 查看各组件状态。 179 A: 执行 `./scripts/check_services.sh` 查看各组件状态。
docs/Skills实现方案-LangChain1.0.md
@@ -7,7 +7,7 @@ Agent 鍦 system prompt 涓彧鐪嬪埌鎶鑳芥憳瑕侊紝鎸夐渶鍔犺浇璇︾粏鎶鑳藉唴瀹 @@ -7,7 +7,7 @@ Agent 鍦 system prompt 涓彧鐪嬪埌鎶鑳芥憳瑕侊紝鎸夐渶鍔犺浇璇︾粏鎶鑳藉唴瀹
7 7
8 | 鎶鑳 | 鑻辨枃鏍囪瘑 | 鑱岃矗 | 8 | 鎶鑳 | 鑻辨枃鏍囪瘑 | 鑱岃矗 |
9 |------|----------|------| 9 |------|----------|------|
10 -| 鏌ユ壘鐩稿叧鍟嗗搧 | lookup_related | 鍩轰簬鏂囨湰/鍥剧墖鏌ユ壘鐩镐技鎴栫浉鍏冲晢鍝 | 10 +| 鏌ユ壘鐩稿叧鍟嗗搧 | lookup_related | 鍩轰簬鏂囨湰/鍥剧墖鏌ユ壘鐩镐技鎴栫浉鍏冲晢鍝侊紙鍥剧墖闇鍏堝垎鏋愰鏍硷級 |
11 | 鎼滅储鍟嗗搧 | search_products | 鎸夎嚜鐒惰瑷鎻忚堪鎼滅储鍟嗗搧 | 11 | 鎼滅储鍟嗗搧 | search_products | 鎸夎嚜鐒惰瑷鎻忚堪鎼滅储鍟嗗搧 |
12 | 妫楠屽晢鍝 | check_product | 妫楠屽晢鍝佹槸鍚︾鍚堢敤鎴疯姹 | 12 | 妫楠屽晢鍝 | check_product | 妫楠屽晢鍝佹槸鍚︾鍚堢敤鎴疯姹 |
13 | 缁撴灉鍖呰 | result_packaging | 鏍煎紡鍖栥佹帓搴忋佺瓫閫夊苟鍛堢幇缁撴灉 | 13 | 缁撴灉鍖呰 | result_packaging | 鏍煎紡鍖栥佹帓搴忋佺瓫閫夊苟鍛堢幇缁撴灉 |
@@ -24,7 +24,7 @@ Agent 鍦 system prompt 涓彧鐪嬪埌鎶鑳芥憳瑕侊紝鎸夐渶鍔犺浇璇︾粏鎶鑳藉唴瀹 @@ -24,7 +24,7 @@ Agent 鍦 system prompt 涓彧鐪嬪埌鎶鑳芥憳瑕侊紝鎸夐渶鍔犺浇璇︾粏鎶鑳藉唴瀹
24 | **鏂瑰紡 A锛歝reate_agent + 鑷畾涔 Skill 涓棿浠** | 璐墿瀵艰喘绛変笟鍔 Agent | `langchain>=1.0`銆乣langgraph>=1.0` | 24 | **鏂瑰紡 A锛歝reate_agent + 鑷畾涔 Skill 涓棿浠** | 璐墿瀵艰喘绛変笟鍔 Agent | `langchain>=1.0`銆乣langgraph>=1.0` |
25 | **鏂瑰紡 B锛欴eep Agents + SKILL.md** | 渚濊禆鏂囦欢绯荤粺銆佸鎶鑳界洰褰 | `deepagents` | 25 | **鏂瑰紡 B锛欴eep Agents + SKILL.md** | 渚濊禆鏂囦欢绯荤粺銆佸鎶鑳界洰褰 | `deepagents` |
26 26
27 -璐墿瀵艰喘鍦烘櫙鎺ㄨ崘**鏂瑰紡 A**锛屾洿鏄撲笌鐜版湁 Milvus銆丆LIP 绛夋湇鍔¢泦鎴愩 27 +璐墿瀵艰喘鍦烘櫙鎺ㄨ崘**鏂瑰紡 A**锛屾洿鏄撲笌鐜版湁 Search API 绛夋湇鍔¢泦鎴愩
28 28
29 ### 2.2 鏍稿績鎬濊矾锛歅rogressive Disclosure 29 ### 2.2 鏍稿績鎬濊矾锛歅rogressive Disclosure
30 30
@@ -58,7 +58,7 @@ class Skill(TypedDict): @@ -58,7 +58,7 @@ class Skill(TypedDict):
58 SKILLS: list[Skill] = [ 58 SKILLS: list[Skill] = [
59 { 59 {
60 "name": "lookup_related", 60 "name": "lookup_related",
61 - "description": "鏌ユ壘涓庢煇鍟嗗搧鐩稿叧鐨勫叾浠栧晢鍝侊紝鏀寔浠ュ浘鎼滃浘銆佹枃鏈浉浼笺佸悓鍝佺被鎺ㄨ崘銆", 61 + "description": "鏌ユ壘涓庢煇鍟嗗搧鐩稿叧鐨勫叾浠栧晢鍝侊紝鏀寔鏂囨湰鐩镐技銆佸悓鍝佺被鎺ㄨ崘銆",
62 "content": """# 鏌ユ壘鐩稿叧鍟嗗搧 62 "content": """# 鏌ユ壘鐩稿叧鍟嗗搧
63 63
64 ## 閫傜敤鍦烘櫙 64 ## 閫傜敤鍦烘櫙
@@ -67,12 +67,11 @@ SKILLS: list[Skill] = [ @@ -67,12 +67,11 @@ SKILLS: list[Skill] = [
67 - 鐢ㄦ埛宸叉湁涓浠跺晢鍝侊紝鎯虫壘鐩稿叧娆 67 - 鐢ㄦ埛宸叉湁涓浠跺晢鍝侊紝鎯虫壘鐩稿叧娆
68 68
69 ## 鎿嶄綔姝ラ 69 ## 鎿嶄綔姝ラ
70 -1. **鏈夊浘鐗**锛氬厛璋冪敤 `analyze_image_style` 鐞嗚В椋庢牸锛屽啀璋冪敤 `search_by_image` 鎴 `search_products` 70 +1. **鏈夊浘鐗**锛氬厛璋冪敤 `analyze_image_style` 鐞嗚В椋庢牸锛屽啀璋冪敤 `search_products` 鐢ㄦ弿杩版悳绱
71 2. **鏃犲浘鐗**锛氱敤 `search_products` 鎻忚堪鍝佺被+椋庢牸+棰滆壊 71 2. **鏃犲浘鐗**锛氱敤 `search_products` 鎻忚堪鍝佺被+椋庢牸+棰滆壊
72 3. 鍙粨鍚堜笂涓嬫枃涓殑鍟嗗搧 ID銆佸搧绫诲仛鍚屽搧绫绘帹鑽 72 3. 鍙粨鍚堜笂涓嬫枃涓殑鍟嗗搧 ID銆佸搧绫诲仛鍚屽搧绫绘帹鑽
73 73
74 ## 鍙敤宸ュ叿 74 ## 鍙敤宸ュ叿
75 -- `search_by_image(image_path, limit)`锛氫互鍥炬悳鍥  
76 - `search_products(query, limit)`锛氭枃鏈悳绱 75 - `search_products(query, limit)`锛氭枃鏈悳绱
77 - `analyze_image_style(image_path)`锛氬垎鏋愬浘鐗囬鏍""", 76 - `analyze_image_style(image_path)`锛氬垎鏋愬浘鐗囬鏍""",
78 }, 77 },
@@ -225,15 +224,14 @@ class ShoppingSkillMiddleware(AgentMiddleware): @@ -225,15 +224,14 @@ class ShoppingSkillMiddleware(AgentMiddleware):
225 from langchain.agents import create_agent 224 from langchain.agents import create_agent
226 from langgraph.checkpoint.memory import MemorySaver 225 from langgraph.checkpoint.memory import MemorySaver
227 226
228 -# 鍩虹宸ュ叿锛堟悳绱€佷互鍥炬悳鍥俱侀鏍煎垎鏋愮瓑锛  
229 -from app.tools.search_tools import search_products, search_by_image, analyze_image_style 227 +# 鍩虹宸ュ叿锛堟悳绱€侀鏍煎垎鏋愮瓑锛
  228 +from app.tools.search_tools import search_products, analyze_image_style
230 229
231 agent = create_agent( 230 agent = create_agent(
232 model="gpt-4o-mini", 231 model="gpt-4o-mini",
233 tools=[ 232 tools=[
234 load_skill, # 鎶鑳藉姞杞 233 load_skill, # 鎶鑳藉姞杞
235 search_products, 234 search_products,
236 - search_by_image,  
237 analyze_image_style, 235 analyze_image_style,
238 ], 236 ],
239 system_prompt="""浣犳槸鏅鸿兘鏃跺皻璐墿鍔╂墜銆傛牴鎹敤鎴烽渶姹傦紝鍏堝垽鏂娇鐢ㄥ摢涓妧鑳斤紝蹇呰鏃剁敤 load_skill 鍔犺浇鎶鑳借鎯呫 237 system_prompt="""浣犳槸鏅鸿兘鏃跺皻璐墿鍔╂墜銆傛牴鎹敤鎴烽渶姹傦紝鍏堝垽鏂娇鐢ㄥ摢涓妧鑳斤紝蹇呰鏃剁敤 load_skill 鍔犺浇鎶鑳借鎯呫
@@ -250,7 +248,7 @@ agent = create_agent( @@ -250,7 +248,7 @@ agent = create_agent(
250 248
251 | 鑳藉姏 | 鎶鑳 | 宸ュ叿 | 249 | 鑳藉姏 | 鎶鑳 | 宸ュ叿 |
252 |------|------|------| 250 |------|------|------|
253 -| 鏌ユ壘鐩稿叧 | lookup_related | search_by_image, search_products, analyze_image_style | 251 +| 鏌ユ壘鐩稿叧 | lookup_related | search_products, analyze_image_style |
254 | 鎼滅储鍟嗗搧 | search_products | search_products | 252 | 鎼滅储鍟嗗搧 | search_products | search_products |
255 | 妫楠屽晢鍝 | check_product | search_products锛堢敤 query 琛ㄨ揪绾︽潫锛 | 253 | 妫楠屽晢鍝 | check_product | search_products锛堢敤 query 琛ㄨ揪绾︽潫锛 |
256 | 缁撴灉鍖呰 | result_packaging | 鏃狅紙绾 prompt 绾︽潫锛 | 254 | 缁撴灉鍖呰 | result_packaging | 鏃狅紙绾 prompt 绾︽潫锛 |
技术实现报告.md renamed to docs/技术实现报告.md
@@ -7,7 +7,7 @@ OmniShopAgent 是一个基于 **LangGraph** 和 **ReAct 模式** 的自主多模 @@ -7,7 +7,7 @@ OmniShopAgent 是一个基于 **LangGraph** 和 **ReAct 模式** 的自主多模
7 ### 核心特性 7 ### 核心特性
8 8
9 - **自主工具选择与执行**:Agent 根据用户意图自主选择并调用工具 9 - **自主工具选择与执行**:Agent 根据用户意图自主选择并调用工具
10 -- **多模态搜索**:支持文本搜索 + 图像搜索 10 +- **文本搜索**:通过 Search API 进行商品搜索
11 - **对话上下文感知**:多轮对话中保持上下文记忆 11 - **对话上下文感知**:多轮对话中保持上下文记忆
12 - **实时视觉分析**:基于 VLM 的图片风格分析 12 - **实时视觉分析**:基于 VLM 的图片风格分析
13 13
@@ -20,9 +20,7 @@ OmniShopAgent 是一个基于 **LangGraph** 和 **ReAct 模式** 的自主多模 @@ -20,9 +20,7 @@ OmniShopAgent 是一个基于 **LangGraph** 和 **ReAct 模式** 的自主多模
20 | 运行环境 | Python 3.12 | 20 | 运行环境 | Python 3.12 |
21 | Agent 框架 | LangGraph 1.x | 21 | Agent 框架 | LangGraph 1.x |
22 | LLM 框架 | LangChain 1.x(支持任意 LLM,默认 gpt-4o-mini) | 22 | LLM 框架 | LangChain 1.x(支持任意 LLM,默认 gpt-4o-mini) |
23 -| 文本向量 | text-embedding-3-small |  
24 -| 图像向量 | CLIP ViT-B/32 |  
25 -| 向量数据库 | Milvus | 23 +| 搜索服务 | Search API (HTTP) |
26 | 前端 | Streamlit | 24 | 前端 | Streamlit |
27 | 数据集 | Kaggle Fashion Products | 25 | 数据集 | Kaggle Fashion Products |
28 26
@@ -45,23 +43,21 @@ OmniShopAgent 是一个基于 **LangGraph** 和 **ReAct 模式** 的自主多模 @@ -45,23 +43,21 @@ OmniShopAgent 是一个基于 **LangGraph** 和 **ReAct 模式** 的自主多模
45 │ │ START → Agent → [Has tool_calls?] → Tools → Agent → END │ │ 43 │ │ START → Agent → [Has tool_calls?] → Tools → Agent → END │ │
46 │ └───────────────────────────────────────────────────────────┘ │ 44 │ └───────────────────────────────────────────────────────────┘ │
47 └─────────────────────────────────────────────────────────────────┘ 45 └─────────────────────────────────────────────────────────────────┘
48 - │ │ │  
49 - ▼ ▼ ▼  
50 -┌──────────────┐ ┌──────────────────┐ ┌─────────────────────┐  
51 -│ search_ │ │ search_by_image │ │ analyze_image_style │  
52 -│ products │ │ │ │ (OpenAI Vision) │  
53 -└──────┬───────┘ └────────┬─────────┘ └──────────┬───────────┘  
54 - │ │ │  
55 - ▼ ▼ ▼ 46 + │ │
  47 + ▼ ▼
  48 +┌──────────────┐ ┌─────────────────────┐
  49 +│ search_ │ │ analyze_image_style │
  50 +│ products │ │ (OpenAI Vision) │
  51 +└──────┬───────┘ └──────────┬──────────┘
  52 + │ │
  53 + ▼ │
  54 +┌──────────────────┐ │
  55 +│ Search API │ │
  56 +│ (HTTP POST) │ │
  57 +└──────────────────┘ │
  58 + ▼
56 ┌─────────────────────────────────────────────────────────────────┐ 59 ┌─────────────────────────────────────────────────────────────────┐
57 -│ EmbeddingService (embedding_service.py) │  
58 -│ OpenAI API (文本) │ CLIP Server (图像) │  
59 -└─────────────────────────────────────────────────────────────────┘  
60 - │  
61 - ▼  
62 -┌─────────────────────────────────────────────────────────────────┐  
63 -│ MilvusService (milvus_service.py) │  
64 -│ text_embeddings 集合 │ image_embeddings 集合 │ 60 +│ OpenAI API (VLM 风格分析) │
65 └─────────────────────────────────────────────────────────────────┘ 61 └─────────────────────────────────────────────────────────────────┘
66 ``` 62 ```
67 63
@@ -140,12 +136,11 @@ def _build_graph(self): @@ -140,12 +136,11 @@ def _build_graph(self):
140 ```python 136 ```python
141 system_prompt = """You are an intelligent fashion shopping assistant. You can: 137 system_prompt = """You are an intelligent fashion shopping assistant. You can:
142 1. Search for products by text description (use search_products) 138 1. Search for products by text description (use search_products)
143 -2. Find visually similar products from images (use search_by_image)  
144 -3. Analyze image style and attributes (use analyze_image_style) 139 +2. Analyze image style and attributes (use analyze_image_style)
145 140
146 When a user asks about products: 141 When a user asks about products:
147 - For text queries: use search_products directly 142 - For text queries: use search_products directly
148 -- For image uploads: decide if you need to analyze_image_style first, then search 143 +- For image uploads: use analyze_image_style first to understand the product, then use search_products with the extracted description
149 - You can call multiple tools in sequence if needed 144 - You can call multiple tools in sequence if needed
150 - Always provide helpful, friendly responses 145 - Always provide helpful, friendly responses
151 146
@@ -198,41 +193,38 @@ def chat(self, query: str, image_path: Optional[str] = None) -> dict: @@ -198,41 +193,38 @@ def chat(self, query: str, image_path: Optional[str] = None) -> dict:
198 193
199 ### 4.2 搜索工具实现(search_tools.py) 194 ### 4.2 搜索工具实现(search_tools.py)
200 195
201 -#### 4.2.1 文本语义搜索 196 +#### 4.2.1 文本搜索(Search API)
202 197
203 ```python 198 ```python
204 @tool 199 @tool
205 def search_products(query: str, limit: int = 5) -> str: 200 def search_products(query: str, limit: int = 5) -> str:
206 """Search for fashion products using natural language descriptions.""" 201 """Search for fashion products using natural language descriptions."""
207 try: 202 try:
208 - embedding_service = get_embedding_service()  
209 - milvus_service = get_milvus_service()  
210 -  
211 - query_embedding = embedding_service.get_text_embedding(query)  
212 -  
213 - results = milvus_service.search_similar_text(  
214 - query_embedding=query_embedding,  
215 - limit=min(limit, 20),  
216 - filters=None,  
217 - output_fields=[  
218 - "id", "productDisplayName", "gender", "masterCategory",  
219 - "subCategory", "articleType", "baseColour", "season", "usage",  
220 - ],  
221 - ) 203 + url = f"{settings.search_api_base_url.rstrip('/')}/search/"
  204 + headers = {
  205 + "Content-Type": "application/json",
  206 + "X-Tenant-ID": settings.search_api_tenant_id,
  207 + }
  208 + payload = {
  209 + "query": query,
  210 + "size": min(limit, 20),
  211 + "from": 0,
  212 + "language": "zh",
  213 + }
  214 +
  215 + response = requests.post(url, json=payload, headers=headers, timeout=60)
  216 + data = response.json()
  217 + results = data.get("results", [])
222 218
223 if not results: 219 if not results:
224 return "No products found matching your search." 220 return "No products found matching your search."
225 221
226 output = f"Found {len(results)} product(s):\n\n" 222 output = f"Found {len(results)} product(s):\n\n"
227 for idx, product in enumerate(results, 1): 223 for idx, product in enumerate(results, 1):
228 - output += f"{idx}. {product.get('productDisplayName', 'Unknown Product')}\n"  
229 - output += f" ID: {product.get('id', 'N/A')}\n"  
230 - output += f" Category: {product.get('masterCategory')} > {product.get('subCategory')} > {product.get('articleType')}\n"  
231 - output += f" Color: {product.get('baseColour')}\n"  
232 - output += f" Gender: {product.get('gender')}\n"  
233 - if "distance" in product:  
234 - similarity = 1 - product["distance"]  
235 - output += f" Relevance: {similarity:.2%}\n" 224 + output += f"{idx}. {product.get('title', 'Unknown Product')}\n"
  225 + output += f" ID: {product.get('spu_id', 'N/A')}\n"
  226 + output += f" Category: {product.get('category_path', 'N/A')}\n"
  227 + output += f" Price: {product.get('price')}\n"
236 output += "\n" 228 output += "\n"
237 229
238 return output.strip() 230 return output.strip()
@@ -240,38 +232,7 @@ def search_products(query: str, limit: int = 5) -> str: @@ -240,38 +232,7 @@ def search_products(query: str, limit: int = 5) -> str:
240 return f"Error searching products: {str(e)}" 232 return f"Error searching products: {str(e)}"
241 ``` 233 ```
242 234
243 -#### 4.2.2 图像相似度搜索  
244 -  
245 -```python  
246 -@tool  
247 -def search_by_image(image_path: str, limit: int = 5) -> str:  
248 - """Find similar fashion products using an image."""  
249 - if not Path(image_path).exists():  
250 - return f"Error: Image file not found at '{image_path}'"  
251 -  
252 - embedding_service = get_embedding_service()  
253 - milvus_service = get_milvus_service()  
254 -  
255 - if not embedding_service.clip_client:  
256 - embedding_service.connect_clip()  
257 -  
258 - image_embedding = embedding_service.get_image_embedding(image_path)  
259 -  
260 - results = milvus_service.search_similar_images(  
261 - query_embedding=image_embedding,  
262 - limit=min(limit + 1, 21),  
263 - output_fields=[...],  
264 - )  
265 -  
266 - # 过滤掉查询图像本身(如上传的是商品库中的图)  
267 - query_id = Path(image_path).stem  
268 - filtered_results = [r for r in results if Path(r.get("image_path", "")).stem != query_id]  
269 - filtered_results = filtered_results[:limit]  
270 -  
271 -  
272 -```  
273 -  
274 -#### 4.2.3 视觉分析(VLM) 235 +#### 4.2.2 视觉分析(VLM)
275 236
276 ```python 237 ```python
277 @tool 238 @tool
@@ -310,161 +271,9 @@ Provide a comprehensive yet concise description (3-4 sentences).""" @@ -310,161 +271,9 @@ Provide a comprehensive yet concise description (3-4 sentences)."""
310 271
311 --- 272 ---
312 273
313 -### 4.3 向量服务实现  
314 -  
315 -#### 4.3.1 EmbeddingService(embedding_service.py)  
316 -  
317 -```python  
318 -class EmbeddingService:  
319 - def get_text_embedding(self, text: str) -> List[float]:  
320 - """OpenAI text-embedding-3-small"""  
321 - response = self.openai_client.embeddings.create(  
322 - input=text, model=self.text_embedding_model  
323 - )  
324 - return response.data[0].embedding  
325 -  
326 - def get_image_embedding(self, image_path: Union[str, Path]) -> List[float]:  
327 - """CLIP 图像向量"""  
328 - if not self.clip_client:  
329 - raise RuntimeError("CLIP client not connected. Call connect_clip() first.")  
330 - result = self.clip_client.encode([str(image_path)])  
331 - if isinstance(result, np.ndarray):  
332 - embedding = result[0].tolist() if len(result.shape) > 1 else result.tolist()  
333 - else:  
334 - embedding = result[0].embedding.tolist()  
335 - return embedding  
336 -  
337 - def get_text_embeddings_batch(self, texts: List[str], batch_size: int = 100) -> List[List[float]]:  
338 - """批量文本嵌入,用于索引"""  
339 - for i in range(0, len(texts), batch_size):  
340 - batch = texts[i : i + batch_size]  
341 - response = self.openai_client.embeddings.create(input=batch, ...)  
342 - embeddings = [item.embedding for item in response.data]  
343 - all_embeddings.extend(embeddings)  
344 - return all_embeddings  
345 -```  
346 -  
347 -#### 4.3.2 MilvusService(milvus_service.py) 274 +### 4.3 Streamlit 前端(app.py)
348 275
349 -**文本集合 Schema:**  
350 -  
351 -```python  
352 -schema = MilvusClient.create_schema(auto_id=False, enable_dynamic_field=True)  
353 -schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)  
354 -schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=2000)  
355 -schema.add_field(field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=self.text_dim) # 1536  
356 -schema.add_field(field_name="productDisplayName", datatype=DataType.VARCHAR, max_length=500)  
357 -schema.add_field(field_name="gender", datatype=DataType.VARCHAR, max_length=50)  
358 -schema.add_field(field_name="masterCategory", datatype=DataType.VARCHAR, max_length=100)  
359 -# ... 更多元数据字段  
360 -```  
361 -  
362 -**图像集合 Schema:**  
363 -  
364 -```python  
365 -schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)  
366 -schema.add_field(field_name="image_path", datatype=DataType.VARCHAR, max_length=500)  
367 -schema.add_field(field_name="embedding", datatype=DataType.FLOAT_VECTOR, dim=self.image_dim) # 512  
368 -# ... 产品元数据  
369 -```  
370 -  
371 -**相似度搜索:**  
372 -  
373 -```python  
374 -def search_similar_text(self, query_embedding, limit=10, output_fields=None):  
375 - results = self.client.search(  
376 - collection_name=self.text_collection_name,  
377 - data=[query_embedding],  
378 - limit=limit,  
379 - output_fields=output_fields,  
380 - )  
381 - formatted_results = []  
382 - for hit in results[0]:  
383 - result = {"id": hit.get("id"), "distance": hit.get("distance")}  
384 - entity = hit.get("entity", {})  
385 - for field in output_fields:  
386 - if field in entity:  
387 - result[field] = entity.get(field)  
388 - formatted_results.append(result)  
389 - return formatted_results  
390 -```  
391 -  
392 ----  
393 -  
394 -### 4.4 数据索引脚本(index_data.py)  
395 -  
396 -#### 4.4.1 产品数据加载  
397 -  
398 -```python  
399 -def _load_products_from_csv(self) -> Dict[int, Dict[str, Any]]:  
400 - products = {}  
401 - # 加载 images.csv 映射  
402 - with open(self.images_csv, "r") as f:  
403 - images_dict = {int(row["filename"].split(".")[0]): row["link"] for row in csv.DictReader(f)}  
404 -  
405 - # 加载 styles.csv  
406 - with open(self.styles_csv, "r") as f:  
407 - for row in csv.DictReader(f):  
408 - product_id = int(row["id"])  
409 - products[product_id] = {  
410 - "id": product_id,  
411 - "gender": row.get("gender", ""),  
412 - "masterCategory": row.get("masterCategory", ""),  
413 - "subCategory": row.get("subCategory", ""),  
414 - "articleType": row.get("articleType", ""),  
415 - "baseColour": row.get("baseColour", ""),  
416 - "season": row.get("season", ""),  
417 - "usage": row.get("usage", ""),  
418 - "productDisplayName": row.get("productDisplayName", ""),  
419 - "imagePath": f"{product_id}.jpg",  
420 - }  
421 - return products  
422 -```  
423 -  
424 -#### 4.4.2 文本索引  
425 -  
426 -```python  
427 -def _create_product_text(self, product: Dict[str, Any]) -> str:  
428 - """构造产品文本用于 embedding"""  
429 - parts = [  
430 - product.get("productDisplayName", ""),  
431 - f"Gender: {product.get('gender', '')}",  
432 - f"Category: {product.get('masterCategory', '')} > {product.get('subCategory', '')}",  
433 - f"Type: {product.get('articleType', '')}",  
434 - f"Color: {product.get('baseColour', '')}",  
435 - f"Season: {product.get('season', '')}",  
436 - f"Usage: {product.get('usage', '')}",  
437 - ]  
438 - return " | ".join([p for p in parts if p and p != "Gender: " and p != "Color: "])  
439 -```  
440 -  
441 -#### 4.4.3 批量索引流程  
442 -  
443 -```python  
444 -# 文本索引  
445 -texts = [self._create_product_text(p) for p in products]  
446 -embeddings = self.embedding_service.get_text_embeddings_batch(texts, batch_size=50)  
447 -milvus_data = [{  
448 - "id": product_id,  
449 - "text": text[:2000],  
450 - "embedding": embedding,  
451 - "productDisplayName": product["productDisplayName"][:500],  
452 - "gender": product["gender"][:50],  
453 - # ... 其他元数据  
454 -} for product_id, text, embedding in zip(...)]  
455 -self.milvus_service.insert_text_embeddings(milvus_data)  
456 -  
457 -# 图像索引  
458 -image_paths = [self.image_dir / p["imagePath"] for p in products]  
459 -embeddings = self.embedding_service.get_image_embeddings_batch(image_paths, batch_size=32)  
460 -# 类似插入 image_embeddings 集合  
461 -```  
462 -  
463 ----  
464 -  
465 -### 4.5 Streamlit 前端(app.py)  
466 -  
467 -#### 4.5.1 会话与 Agent 初始化 276 +#### 4.3.1 会话与 Agent 初始化
468 277
469 ```python 278 ```python
470 def initialize_session(): 279 def initialize_session():
@@ -478,7 +287,7 @@ def initialize_session(): @@ -478,7 +287,7 @@ def initialize_session():
478 st.session_state.uploaded_image = None 287 st.session_state.uploaded_image = None
479 ``` 288 ```
480 289
481 -#### 4.5.2 产品信息解析 290 +#### 4.3.2 产品信息解析
482 291
483 ```python 292 ```python
484 def extract_products_from_response(response: str) -> list: 293 def extract_products_from_response(response: str) -> list:
@@ -501,7 +310,7 @@ def extract_products_from_response(response: str) -> list: @@ -501,7 +310,7 @@ def extract_products_from_response(response: str) -> list:
501 return products 310 return products
502 ``` 311 ```
503 312
504 -#### 4.5.3 多轮对话中的图片引用 313 +#### 4.3.3 多轮对话中的图片引用
505 314
506 ```python 315 ```python
507 # 用户输入 "make them formal" 时,若上一条消息有图片,则引用该图片 316 # 用户输入 "make them formal" 时,若上一条消息有图片,则引用该图片
@@ -514,28 +323,14 @@ if any(ref in query_lower for ref in ["this", "that", "the image", "it"]): @@ -514,28 +323,14 @@ if any(ref in query_lower for ref in ["this", "that", "the image", "it"]):
514 323
515 --- 324 ---
516 325
517 -### 4.6 配置管理(config.py) 326 +### 4.4 配置管理(config.py)
518 327
519 ```python 328 ```python
520 class Settings(BaseSettings): 329 class Settings(BaseSettings):
521 openai_api_key: str 330 openai_api_key: str
522 openai_model: str = "gpt-4o-mini" 331 openai_model: str = "gpt-4o-mini"
523 - openai_embedding_model: str = "text-embedding-3-small"  
524 - clip_server_url: str = "grpc://localhost:51000"  
525 - milvus_uri: str = "http://localhost:19530"  
526 - text_collection_name: str = "text_embeddings"  
527 - image_collection_name: str = "image_embeddings"  
528 - text_dim: int = 1536  
529 - image_dim: int = 512  
530 -  
531 - @property  
532 - def milvus_uri_absolute(self) -> str:  
533 - """支持 Milvus Standalone 和 Milvus Lite"""  
534 - if self.milvus_uri.startswith(("http://", "https://")):  
535 - return self.milvus_uri  
536 - if self.milvus_uri.startswith("./"):  
537 - return os.path.join(base_dir, self.milvus_uri[2:])  
538 - return self.milvus_uri 332 + search_api_base_url: str = "http://120.76.41.98:6002"
  333 + search_api_tenant_id: str = "162"
539 334
540 class Config: 335 class Config:
541 env_file = ".env" 336 env_file = ".env"
@@ -547,35 +342,22 @@ class Settings(BaseSettings): @@ -547,35 +342,22 @@ class Settings(BaseSettings):
547 342
548 ### 5.1 依赖服务 343 ### 5.1 依赖服务
549 344
550 -```yaml  
551 -# docker-compose.yml 提供  
552 -- etcd: 元数据存储  
553 -- minio: 对象存储  
554 -- milvus-standalone: 向量数据库  
555 -- attu: Milvus 管理界面  
556 -``` 345 +- **Search API**:外部搜索服务(HTTP)
  346 +- **OpenAI API**:LLM 与 VLM 图像分析
557 347
558 ### 5.2 启动流程 348 ### 5.2 启动流程
559 349
560 ```bash 350 ```bash
561 # 1. 环境 351 # 1. 环境
562 pip install -r requirements.txt 352 pip install -r requirements.txt
563 -cp .env.example .env # 配置 OPENAI_API_KEY 353 +cp .env.example .env # 配置 OPENAI_API_KEY、SEARCH_API_* 等
564 354
565 -# 2. 下载数据 355 +# 2. (可选)下载数据
566 python scripts/download_dataset.py # Kaggle Fashion Product Images Dataset 356 python scripts/download_dataset.py # Kaggle Fashion Product Images Dataset
567 357
568 -# 3. 启动 CLIP 服务(需单独运行)  
569 -python -m clip_server  
570 -  
571 -# 4. 启动 Milvus  
572 -docker-compose up  
573 -  
574 -# 5. 索引数据  
575 -python scripts/index_data.py  
576 -  
577 -# 6. 启动应用 358 +# 3. 启动应用
578 streamlit run app.py 359 streamlit run app.py
  360 +# 或 ./scripts/start.sh
579 ``` 361 ```
580 362
581 --- 363 ---
@@ -585,7 +367,6 @@ streamlit run app.py @@ -585,7 +367,6 @@ streamlit run app.py
585 | 场景 | 用户输入 | Agent 行为 | 工具调用 | 367 | 场景 | 用户输入 | Agent 行为 | 工具调用 |
586 |------|----------|------------|----------| 368 |------|----------|------------|----------|
587 | 文本搜索 | "winter coats for women" | 直接文本搜索 | `search_products("winter coats women")` | 369 | 文本搜索 | "winter coats for women" | 直接文本搜索 | `search_products("winter coats women")` |
588 -| 图像搜索 | [上传图片] "find similar" | 图像相似度搜索 | `search_by_image(path)` |  
589 | 风格分析+搜索 | [上传复古夹克] "what style? find matching pants" | 先分析风格再搜索 | `analyze_image_style(path)` → `search_products("vintage pants casual")` | 370 | 风格分析+搜索 | [上传复古夹克] "what style? find matching pants" | 先分析风格再搜索 | `analyze_image_style(path)` → `search_products("vintage pants casual")` |
590 | 多轮上下文 | [第1轮] "show me red dresses"<br>[第2轮] "make them formal" | 结合上下文 | `search_products("red formal dresses")` | 371 | 多轮上下文 | [第1轮] "show me red dresses"<br>[第2轮] "make them formal" | 结合上下文 | `search_products("red formal dresses")` |
591 372
@@ -595,10 +376,9 @@ streamlit run app.py @@ -595,10 +376,9 @@ streamlit run app.py
595 376
596 1. **ReAct 模式**:Agent 自主决定何时调用工具、调用哪些工具、是否继续调用。 377 1. **ReAct 模式**:Agent 自主决定何时调用工具、调用哪些工具、是否继续调用。
597 2. **LangGraph 状态图**:`START → Agent → [条件] → Tools → Agent → END`,支持多轮工具调用。 378 2. **LangGraph 状态图**:`START → Agent → [条件] → Tools → Agent → END`,支持多轮工具调用。
598 -3. **多模态**:文本 + 图像 + VLM 分析,覆盖文本搜索、以图搜图、风格理解。  
599 -4. **双向量集合**:Milvus 中 text_embeddings / image_embeddings 分别存储,支持不同模态的检索。  
600 -5. **会话持久化**:`MemorySaver` + `thread_id` 实现多轮对话记忆。  
601 -6. **格式约束**:System prompt 严格限制产品输出格式,便于前端解析和展示。 379 +3. **搜索与风格分析**:Search API 文本搜索 + VLM 图像风格分析。
  380 +4. **会话持久化**:`MemorySaver` + `thread_id` 实现多轮对话记忆。
  381 +5. **格式约束**:System prompt 严格限制产品输出格式,便于前端解析和展示。
602 382
603 --- 383 ---
604 384
@@ -611,8 +391,6 @@ OmniShopAgent/ @@ -611,8 +391,6 @@ OmniShopAgent/
611 │ │ └── shopping_agent.py 391 │ │ └── shopping_agent.py
612 │ ├── config.py 392 │ ├── config.py
613 │ ├── services/ 393 │ ├── services/
614 -│ │ ├── embedding_service.py  
615 -│ │ └── milvus_service.py  
616 │ └── tools/ 394 │ └── tools/
617 │ └── search_tools.py 395 │ └── search_tools.py
618 ├── scripts/ 396 ├── scripts/
docs/搜索API对接指南.md 0 → 100644
@@ -0,0 +1,1651 @@ @@ -0,0 +1,1651 @@
  1 +# 搜索API接口对接指南
  2 +
  3 +本文档为搜索服务的使用方提供完整的API对接指南,包括接口说明、请求参数、响应格式和使用示例。
  4 +
  5 +## 目录
  6 +
  7 +1. [快速开始](#快速开始)
  8 + - 1.1 [基础信息](#11-基础信息)
  9 + - 1.2 [最简单的搜索请求](#12-最简单的搜索请求)
  10 + - 1.3 [带过滤与分页的搜索](#13-带过滤与分页的搜索)
  11 + - 1.4 [开启分面的搜索](#14-开启分面的搜索)
  12 +
  13 +2. [接口概览](#接口概览)
  14 +
  15 +3. [搜索接口](#搜索接口)
  16 + - 3.1 [接口信息](#31-接口信息)
  17 + - 3.2 [请求参数](#32-请求参数)
  18 + - 3.3 [过滤器详解](#33-过滤器详解)
  19 + - 3.4 [分面配置](#34-分面配置)
  20 + - 3.5 [SKU筛选维度](#35-sku筛选维度)
  21 + - 3.6 [布尔表达式语法](#36-布尔表达式语法)
  22 + - 3.7 [搜索建议接口](#37-搜索建议接口)
  23 + - 3.8 [即时搜索接口](#38-即时搜索接口)
  24 + - 3.9 [获取单个文档](#39-获取单个文档)
  25 +
  26 +4. [响应格式说明](#响应格式说明)
  27 + - 4.1 [标准响应结构](#41-标准响应结构)
  28 + - 4.2 [响应字段说明](#42-响应字段说明)
  29 + - 4.2.1 [query_info 说明](#421-query_info-说明)
  30 + - 4.3 [SpuResult字段说明](#43-spuresult字段说明)
  31 + - 4.4 [SkuResult字段说明](#44-skuresult字段说明)
  32 + - 4.5 [多语言字段说明](#45-多语言字段说明)
  33 +
  34 +5. [索引接口](#索引接口)
  35 + - 5.0 [为租户创建索引](#50-为租户创建索引)
  36 + - 5.1 [全量索引接口](#51-全量索引接口)
  37 + - 5.2 [增量索引接口](#52-增量索引接口)
  38 + - 5.3 [查询文档接口](#53-查询文档接口)
  39 + - 5.4 [索引健康检查接口](#54-索引健康检查接口)
  40 +
  41 +6. [管理接口](#管理接口)
  42 + - 6.1 [健康检查](#61-健康检查)
  43 + - 6.2 [获取配置](#62-获取配置)
  44 + - 6.3 [索引统计](#63-索引统计)
  45 +
  46 +7. [常见场景示例](#常见场景示例)
  47 + - 7.1 [基础搜索与排序](#71-基础搜索与排序)
  48 + - 7.2 [过滤搜索](#72-过滤搜索)
  49 + - 7.3 [分面搜索](#73-分面搜索)
  50 + - 7.4 [规格过滤与分面](#74-规格过滤与分面)
  51 + - 7.5 [SKU筛选](#75-sku筛选)
  52 + - 7.6 [布尔表达式搜索](#76-布尔表达式搜索)
  53 + - 7.7 [分页查询](#77-分页查询)
  54 +
  55 +8. [数据模型](#数据模型)
  56 + - 8.1 [商品字段定义](#81-商品字段定义)
  57 + - 8.2 [字段类型速查](#82-字段类型速查)
  58 + - 8.3 [常用字段列表](#83-常用字段列表)
  59 + - 8.4 [支持的分析器](#84-支持的分析器)
  60 +
  61 +---
  62 +
  63 +## 快速开始
  64 +
  65 +### 1.1 基础信息
  66 +
  67 +- **Base URL**: `http://120.76.41.98:6002`
  68 +- **协议**: HTTP/HTTPS
  69 +- **数据格式**: JSON
  70 +- **字符编码**: UTF-8
  71 +- **请求方法**: POST(搜索接口)
  72 +
  73 +**重要提示**: `tenant_id` 通过 HTTP Header `X-Tenant-ID` 传递,不在请求体中。
  74 +
  75 +### 1.2 最简单的搜索请求
  76 +
  77 +```bash
  78 +curl -X POST "http://120.76.41.98:6002/search/" \
  79 + -H "Content-Type: application/json" \
  80 + -H "X-Tenant-ID: 162" \
  81 + -d '{"query": "芭比娃娃"}'
  82 +```
  83 +
  84 +### 1.3 带过滤与分页的搜索
  85 +
  86 +```bash
  87 +curl -X POST "http://120.76.41.98:6002/search/" \
  88 + -H "Content-Type: application/json" \
  89 + -H "X-Tenant-ID: 162" \
  90 + -d '{
  91 + "query": "芭比娃娃",
  92 + "size": 5,
  93 + "from": 10,
  94 + "range_filters": {
  95 + "min_price": {
  96 + "gte": 50,
  97 + "lte": 200
  98 + },
  99 + "create_time": {
  100 + "gte": "2020-01-01T00:00:00Z"
  101 + }
  102 + },
  103 + "sort_by": "price",
  104 + "sort_order": "asc"
  105 + }'
  106 +```
  107 +
  108 +### 1.4 开启分面的搜索
  109 +
  110 +```bash
  111 +curl -X POST "http://120.76.41.98:6002/search/" \
  112 + -H "Content-Type: application/json" \
  113 + -H "X-Tenant-ID: 162" \
  114 + -d '{
  115 + "query": "芭比娃娃",
  116 + "facets": [
  117 + {"field": "category1_name", "size": 10, "type": "terms"},
  118 + {"field": "specifications.color", "size": 10, "type": "terms"},
  119 + {"field": "specifications.size", "size": 10, "type": "terms"}
  120 + ],
  121 + "min_score": 0.2
  122 + }'
  123 +```
  124 +
  125 +---
  126 +
  127 +## 接口概览
  128 +
  129 +| 接口 | HTTP Method | Endpoint | 说明 |
  130 +|------|------|------|------|
  131 +| 搜索 | POST | `/search/` | 执行搜索查询 |
  132 +| 搜索建议 | GET | `/search/suggestions` | 搜索建议(框架,暂未实现) ⚠️ TODO |
  133 +| 即时搜索 | GET | `/search/instant` | 边输入边搜索(框架) ⚠️ TODO |
  134 +| 获取文档 | GET | `/search/{doc_id}` | 获取单个文档 |
  135 +| 全量索引 | POST | `/indexer/reindex` | 全量索引接口(导入数据,不删除索引) |
  136 +| 增量索引 | POST | `/indexer/index` | 增量索引接口(指定SPU ID列表进行索引,支持自动检测删除和显式删除) |
  137 +| 查询文档 | POST | `/indexer/documents` | 查询SPU文档数据(不写入ES) |
  138 +| 索引健康检查 | GET | `/indexer/health` | 检查索引服务状态 |
  139 +| 健康检查 | GET | `/admin/health` | 服务健康检查 |
  140 +| 获取配置 | GET | `/admin/config` | 获取租户配置 |
  141 +| 索引统计 | GET | `/admin/stats` | 获取索引统计信息 |
  142 +
  143 +---
  144 +
  145 +## 搜索接口
  146 +
  147 +### 3.1 接口信息
  148 +
  149 +- **端点**: `POST /search/`
  150 +- **描述**: 执行文本搜索查询,支持多语言、布尔表达式、过滤器和分面搜索
  151 +
  152 +### 3.2 请求参数
  153 +
  154 +#### 完整请求体结构
  155 +
  156 +```json
  157 +{
  158 + "query": "string (required)",
  159 + "size": 10,
  160 + "from": 0,
  161 + "language": "zh",
  162 + "filters": {},
  163 + "range_filters": {},
  164 + "facets": [],
  165 + "sort_by": "string",
  166 + "sort_order": "desc",
  167 + "min_score": 0.0,
  168 + "sku_filter_dimension": ["string"],
  169 + "debug": false,
  170 + "enable_rerank": false,
  171 + "rerank_query_template": "{query}",
  172 + "rerank_doc_template": "{title}",
  173 + "user_id": "string",
  174 + "session_id": "string"
  175 +}
  176 +```
  177 +
  178 +#### 参数详细说明
  179 +
  180 +| 参数 | 类型 | 必填 | 默认值 | 说明 |
  181 +|------|------|------|--------|------|
  182 +| `query` | string | Y | - | 搜索查询字符串,支持布尔表达式(AND, OR, RANK, ANDNOT) |
  183 +| `size` | integer | N | 10 | 返回结果数量(1-100) |
  184 +| `from` | integer | N | 0 | 分页偏移量(用于分页) |
  185 +| `language` | string | N | "zh" | 返回语言:`zh`(中文)或 `en`(英文)。后端会根据此参数选择对应的中英文字段返回 |
  186 +| `filters` | object | N | null | 精确匹配过滤器(见[过滤器详解](#33-过滤器详解)) |
  187 +| `range_filters` | object | N | null | 数值范围过滤器(见[过滤器详解](#33-过滤器详解)) |
  188 +| `facets` | array | N | null | 分面配置(见[分面配置](#34-分面配置)) |
  189 +| `sort_by` | string | N | null | 排序字段名。支持:`price`(价格)、`sales`(销量)、`create_time`(创建时间)、`update_time`(更新时间)。默认按相关性排序 |
  190 +| `sort_order` | string | N | "desc" | 排序方向:`asc`(升序)或 `desc`(降序)。注意:`price`+`asc`=价格从低到高,`price`+`desc`=价格从高到低(后端自动映射为min_price或max_price) |
  191 +| `min_score` | float | N | null | 最小相关性分数阈值 |
  192 +| `sku_filter_dimension` | array[string] | N | null | 子SKU筛选维度列表(见[SKU筛选维度](#35-sku筛选维度)) |
  193 +| `debug` | boolean | N | false | 是否返回调试信息 |
  194 +| `enable_rerank` | boolean | N | false | 是否开启重排(调用外部重排服务对 ES 结果进行二次排序)。开启后若 `from+size<=rerank_window` 才会触发重排 |
  195 +| `rerank_query_template` | string | N | null | 重排 query 模板(可选)。支持 `{query}` 占位符;不传则使用服务端配置 |
  196 +| `rerank_doc_template` | string | N | null | 重排 doc 模板(可选)。支持 `{title} {brief} {vendor} {description} {category_path}`;不传则使用服务端配置 |
  197 +| `user_id` | string | N | null | 用户ID(用于个性化,预留) |
  198 +| `session_id` | string | N | null | 会话ID(用于分析,预留) |
  199 +
  200 +### 3.3 过滤器详解
  201 +
  202 +#### 3.3.1 精确匹配过滤器 (filters)
  203 +
  204 +用于精确匹配或多值匹配。对于普通字段,数组表示 OR 逻辑(匹配任意一个值);对于 specifications 字段,按维度分组处理。**任意字段名加 `_all` 后缀**表示多值 AND 逻辑(必须同时匹配所有值)。
  205 +
  206 +**格式**:
  207 +```json
  208 +{
  209 + "filters": {
  210 + "category_name": "手机", // 可以为单值 或者 数组 匹配数组中任意一个(OR)
  211 + "category1_name": "服装", // 可以为单值 或者 数组 匹配数组中任意一个(OR)
  212 + "category2_name": "男装", // 可以为单值 或者 数组 匹配数组中任意一个(OR)
  213 + "category3_name": "衬衫", // 可以为单值 或者 数组 匹配数组中任意一个(OR)
  214 + "vendor.zh.keyword": ["奇乐", "品牌A"], // 可以为单值 或者 数组 匹配数组中任意一个(OR)
  215 + "tags": "手机", // 可以为单值 或者 数组 匹配数组中任意一个(OR)
  216 + "tags_all": ["手机", "促销", "新品"], // *_all:多值为 AND,必须同时包含所有标签
  217 + "category1_name_all": ["服装", "男装"], // 同上,适用于任意可过滤字段
  218 + // specifications 嵌套过滤(特殊格式)
  219 + "specifications": {
  220 + "name": "color",
  221 + "value": "white"
  222 + }
  223 + }
  224 +}
  225 +```
  226 +
  227 +**支持的值类型**:
  228 +- 字符串:精确匹配
  229 +- 整数:精确匹配
  230 +- 布尔值:精确匹配
  231 +- 数组:匹配任意值(OR 逻辑);若字段名以 `_all` 结尾,则数组表示 AND 逻辑(必须同时匹配所有值)
  232 +- 对象:specifications 嵌套过滤(见下文)
  233 +
  234 +**`*_all` 语义(多值 AND)**:
  235 +- 任意过滤字段均可使用 `_all` 后缀,对应 ES 字段名为去掉 `_all` 后的名称。
  236 +- 例如:`tags_all: ["A", "B"]` 表示文档的 `tags` 必须**同时包含** A 和 B;`vendor.zh.keyword_all: ["奇乐", "品牌A"]` 表示同时匹配两个品牌(通常用于 keyword 多值场景)。
  237 +- `specifications_all`:传列表 `[{"name":"color","value":"white"},{"name":"size","value":"256GB"}]` 时,表示所有列出的规格条件都要满足(与 `specifications` 多维度时的 AND 一致;若同维度多值则要求文档同时满足多个值,一般用于嵌套多值场景)。
  238 +
  239 +**Specifications 嵌套过滤**:
  240 +
  241 +`specifications` 是嵌套字段,支持按规格名称和值进行过滤。
  242 +
  243 +**单个规格过滤**:
  244 +```json
  245 +{
  246 + "filters": {
  247 + "specifications": {
  248 + "name": "color",
  249 + "value": "white"
  250 + }
  251 + }
  252 +}
  253 +```
  254 +查询规格名称为"color"且值为"white"的商品。
  255 +
  256 +**多个规格过滤(按维度分组)**:
  257 +```json
  258 +{
  259 + "filters": {
  260 + "specifications": [
  261 + {"name": "color", "value": "white"},
  262 + {"name": "size", "value": "256GB"}
  263 + ]
  264 + }
  265 +}
  266 +```
  267 +查询同时满足所有规格的商品(color=white **且** size=256GB)。
  268 +
  269 +**相同维度的多个值(OR 逻辑)**:
  270 +```json
  271 +{
  272 + "filters": {
  273 + "specifications": [
  274 + {"name": "size", "value": "3"},
  275 + {"name": "size", "value": "4"},
  276 + {"name": "size", "value": "5"},
  277 + {"name": "color", "value": "green"}
  278 + ]
  279 + }
  280 +}
  281 +```
  282 +查询满足 (size=3 **或** size=4 **或** size=5) **且** color=green 的商品。
  283 +
  284 +**过滤逻辑说明**:
  285 +- **不同维度**(不同的 `name`)之间是 **AND** 关系(求交集)
  286 +- **相同维度**(相同的 `name`)的多个值之间是 **OR** 关系(求并集)
  287 +
  288 +**常用过滤字段**(详见[常用字段列表](#83-常用字段列表)):
  289 +- `category_name`: 类目名称
  290 +- `category1_name`, `category2_name`, `category3_name`: 多级类目
  291 +- `category_id`: 类目ID
  292 +- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段)
  293 +- `tags`: 标签(keyword类型,支持数组)
  294 +- `option1_name`, `option2_name`, `option3_name`: 选项名称
  295 +- `specifications`: 规格过滤(嵌套字段,格式见上文)
  296 +- 以上任意字段均可加 `_all` 后缀表示多值 AND,如 `tags_all`、`category1_name_all`。
  297 +
  298 +#### 3.3.2 范围过滤器 (range_filters)
  299 +
  300 +用于数值字段的范围过滤。
  301 +
  302 +**格式**:
  303 +```json
  304 +{
  305 + "range_filters": {
  306 + "min_price": {
  307 + "gte": 50, // 大于等于
  308 + "lte": 200 // 小于等于
  309 + },
  310 + "max_price": {
  311 + "gt": 100 // 大于
  312 + },
  313 + "create_time": {
  314 + "gte": "2024-01-01T00:00:00Z" // 日期时间字符串
  315 + }
  316 + }
  317 +}
  318 +```
  319 +
  320 +**支持的操作符**:
  321 +- `gte`: 大于等于 (>=)
  322 +- `gt`: 大于 (>)
  323 +- `lte`: 小于等于 (<=)
  324 +- `lt`: 小于 (<)
  325 +
  326 +**注意**: 至少需要指定一个操作符。
  327 +
  328 +**常用范围字段**(详见[常用字段列表](#83-常用字段列表)):
  329 +- `min_price`: 最低价格
  330 +- `max_price`: 最高价格
  331 +- `compare_at_price`: 原价
  332 +- `create_time`: 创建时间
  333 +- `update_time`: 更新时间
  334 +
  335 +### 3.4 分面配置
  336 +
  337 +用于生成分面统计(分组聚合),常用于构建筛选器UI。
  338 +
  339 +#### 3.4.1 配置格式
  340 +
  341 +```json
  342 +{
  343 + "facets": [
  344 + {
  345 + "field": "category1_name",
  346 + "size": 15,
  347 + "type": "terms",
  348 + "disjunctive": false
  349 + },
  350 + {
  351 + "field": "brand_name",
  352 + "size": 10,
  353 + "type": "terms",
  354 + "disjunctive": true
  355 + },
  356 + {
  357 + "field": "specifications.color",
  358 + "size": 20,
  359 + "type": "terms",
  360 + "disjunctive": true
  361 + },
  362 + {
  363 + "field": "min_price",
  364 + "type": "range",
  365 + "ranges": [
  366 + {"key": "0-50", "to": 50},
  367 + {"key": "50-100", "from": 50, "to": 100},
  368 + {"key": "100-200", "from": 100, "to": 200},
  369 + {"key": "200+", "from": 200}
  370 + ]
  371 + }
  372 + ]
  373 +}
  374 +```
  375 +
  376 +#### 3.4.2 Facet 字段说明
  377 +
  378 +| 字段 | 类型 | 必填 | 默认值 | 说明 |
  379 +|------|------|------|--------|------|
  380 +| `field` | string | 是 | - | 分面字段名 |
  381 +| `size` | int | 否 | 10 | 返回的分面值数量(1-100) |
  382 +| `type` | string | 否 | "terms" | 分面类型:`terms`(词条聚合)或 `range`(范围聚合) |
  383 +| `disjunctive` | bool | 否 | false | 是否支持多选(disjunctive faceting)。启用后,选中该分面的过滤器时,仍会显示其他可选项 |
  384 +| `ranges` | array | 否 | null | 范围配置(仅 `type="range"` 时需要) |
  385 +
  386 +#### 3.4.3 disjunctive字段说明
  387 +
  388 +**重要特性**: `disjunctive` 字段控制分面的行为模式。启用后,选中该分面的过滤器时,仍会显示其他可选项
  389 +
  390 +**标准模式 (disjunctive: false)**:
  391 +- **行为**: 选中某个分面值后,该分面只显示选中的值
  392 +- **适用场景**: 层级类目、互斥选择
  393 +- **示例**: 类目下钻(玩具 > 娃娃 > 芭比)
  394 +
  395 +**Multi-Select 模式 (disjunctive: true)** ⭐:
  396 +- **行为**: 选中某个分面值后,该分面仍显示所有可选项
  397 +- **适用场景**: 颜色、品牌、尺码等可切换属性
  398 +- **示例**: 选择了"红色"后,仍能看到"蓝色"、"绿色"等选项
  399 +
  400 +**推荐配置**:
  401 +
  402 +| 分面类型 | disjunctive | 原因 |
  403 +|---------|-------------|------|
  404 +| 颜色 | `true` | 用户需要切换颜色 |
  405 +| 品牌 | `true` | 用户需要比较品牌 |
  406 +| 尺码 | `true` | 用户需要查看其他尺码 |
  407 +| 类目 | `false` | 层级下钻 |
  408 +| 价格区间 | `false` | 互斥选择 |
  409 +
  410 +#### 3.4.4 规格分面说明
  411 +
  412 +`specifications` 是嵌套字段,支持两种分面模式:
  413 +
  414 +**模式1:所有规格名称的分面**:
  415 +```json
  416 +{
  417 + "facets": [
  418 + {
  419 + "field": "specifications",
  420 + "size": 10,
  421 + "type": "terms"
  422 + }
  423 + ]
  424 +}
  425 +```
  426 +返回所有规格名称(name)及其对应的值(value)列表。每个 name 会生成一个独立的分面结果。
  427 +
  428 +**模式2:指定规格名称的分面**:
  429 +```json
  430 +{
  431 + "facets": [
  432 + {
  433 + "field": "specifications.color",
  434 + "size": 20,
  435 + "type": "terms",
  436 + "disjunctive": true
  437 + },
  438 + {
  439 + "field": "specifications.size",
  440 + "size": 15,
  441 + "type": "terms",
  442 + "disjunctive": true
  443 + }
  444 + ]
  445 +}
  446 +```
  447 +只返回指定规格名称的值列表。格式:`specifications.{name}`,其中 `{name}` 是规格名称(如"color"、"size"、"material")。
  448 +
  449 +**返回格式示例**:
  450 +```json
  451 +{
  452 + "facets": [
  453 + {
  454 + "field": "specifications.color",
  455 + "label": "color",
  456 + "type": "terms",
  457 + "values": [
  458 + {"value": "white", "count": 50, "selected": true}, // ✓ selected 字段由后端标记
  459 + {"value": "black", "count": 30, "selected": false},
  460 + {"value": "red", "count": 20, "selected": false}
  461 + ]
  462 + },
  463 + {
  464 + "field": "specifications.size",
  465 + "label": "size",
  466 + "type": "terms",
  467 + "values": [
  468 + {"value": "256GB", "count": 40, "selected": false},
  469 + {"value": "512GB", "count": 20, "selected": false}
  470 + ]
  471 + }
  472 + ]
  473 +}
  474 +```
  475 +
  476 +### 3.5 SKU筛选维度
  477 +
  478 +**功能说明**:
  479 +`sku_filter_dimension` 用于控制搜索列表页中 **每个 SPU 下方可切换的子款式(子 SKU)维度**,为字符串列表。
  480 +在店铺的 **主题装修配置** 中,商家可以为店铺设置一个或多个子款式筛选维度(例如 `color`、`size`),前端列表页会在每个 SPU 下展示这些维度对应的子 SKU 列表,用户可以通过点击不同维度值(如不同颜色)来切换展示的子款式。
  481 +当指定 `sku_filter_dimension` 后,后端会根据店铺的这项配置,从所有 SKU 中筛选出这些维度组合对应的子 SKU 数据:系统会按指定维度**组合**对 SKU 进行分组,每个维度组合只返回第一个 SKU(从简实现,选择该组合下的第一款),其余不在这些维度组合中的子 SKU 将不返回。
  482 +
  483 +**支持的维度值**:
  484 +1. **直接选项字段**: `option1`、`option2`、`option3`
  485 + - 直接使用对应的 `option1_value`、`option2_value`、`option3_value` 字段进行分组
  486 +
  487 +2. **规格/选项名称**: 通过 `option1_name`、`option2_name`、`option3_name` 匹配
  488 + - 例如:如果 `option1_name` 为 `"color"`,则可以使用 `sku_filter_dimension: ["color"]` 来按颜色分组
  489 +
  490 +**示例**:
  491 +
  492 +**按颜色筛选(假设 option1_name = "color")**:
  493 +```json
  494 +{
  495 + "query": "芭比娃娃",
  496 + "sku_filter_dimension": ["color"]
  497 +}
  498 +```
  499 +
  500 +**按选项1筛选**:
  501 +```json
  502 +{
  503 + "query": "芭比娃娃",
  504 + "sku_filter_dimension": ["option1"]
  505 +}
  506 +```
  507 +
  508 +**按颜色 + 尺寸组合筛选(假设 option1_name = "color", option2_name = "size")**:
  509 +```json
  510 +{
  511 + "query": "芭比娃娃",
  512 + "sku_filter_dimension": ["color", "size"]
  513 +}
  514 +```
  515 +
  516 +### 3.6 布尔表达式语法
  517 +
  518 +搜索查询支持布尔表达式,提供更灵活的搜索能力。
  519 +
  520 +**支持的操作符**:
  521 +
  522 +| 操作符 | 描述 | 示例 |
  523 +|--------|------|------|
  524 +| `AND` | 所有词必须匹配 | `玩具 AND 乐高` |
  525 +| `OR` | 任意词匹配 | `芭比 OR 娃娃` |
  526 +| `ANDNOT` | 排除特定词 | `玩具 ANDNOT 电动` |
  527 +| `RANK` | 排序加权(不强制匹配) | `玩具 RANK 乐高` |
  528 +| `()` | 分组 | `玩具 AND (乐高 OR 芭比)` |
  529 +
  530 +**操作符优先级**(从高到低):
  531 +1. `()` - 括号
  532 +2. `ANDNOT` - 排除
  533 +3. `AND` - 与
  534 +4. `OR` - 或
  535 +5. `RANK` - 排序
  536 +
  537 +**示例**:
  538 +```
  539 +"芭比娃娃" // 简单查询
  540 +"玩具 AND 乐高" // AND 查询
  541 +"芭比 OR 娃娃" // OR 查询
  542 +"玩具 ANDNOT 电动" // 排除查询
  543 +"玩具 AND (乐高 OR 芭比)" // 复杂查询
  544 +```
  545 +
  546 +### 3.7 搜索建议接口
  547 +
  548 +> ⚠️ **TODO**: 此接口当前为框架实现,功能暂未实现,仅返回空结果。接口和响应格式已经固定,可平滑扩展。
  549 +
  550 +- **端点**: `GET /search/suggestions`
  551 +- **描述**: 返回搜索建议(自动补全/热词)。当前为框架实现,接口和响应格式已经固定,可平滑扩展。
  552 +
  553 +#### 查询参数
  554 +
  555 +| 参数 | 类型 | 必填 | 默认值 | 描述 |
  556 +|------|------|------|--------|------|
  557 +| `q` | string | Y | - | 查询字符串(至少 1 个字符) |
  558 +| `size` | integer | N | 5 | 返回建议数量(1-20) |
  559 +| `types` | string | N | `query` | 建议类型(逗号分隔):`query`, `product`, `category`, `brand` |
  560 +
  561 +#### 响应示例
  562 +
  563 +```json
  564 +{
  565 + "query": "芭",
  566 + "suggestions": [
  567 + {
  568 + "text": "芭比娃娃",
  569 + "type": "query",
  570 + "highlight": "<em>芭</em>比娃娃",
  571 + "popularity": 850
  572 + }
  573 + ],
  574 + "took_ms": 5
  575 +}
  576 +```
  577 +
  578 +#### 请求示例
  579 +
  580 +```bash
  581 +curl "http://localhost:6002/search/suggestions?q=芭&size=5&types=query,product"
  582 +```
  583 +
  584 +### 3.8 即时搜索接口
  585 +
  586 +> ⚠️ **TODO**: 此接口当前为框架实现,功能暂未实现,调用标准搜索接口。后续需要优化即时搜索性能(添加防抖/节流、实现结果缓存、简化返回字段)。
  587 +
  588 +- **端点**: `GET /search/instant`
  589 +- **描述**: 边输入边搜索,采用轻量参数响应当前输入。底层复用标准搜索能力。
  590 +
  591 +#### 查询参数
  592 +
  593 +| 参数 | 类型 | 必填 | 默认值 | 描述 |
  594 +|------|------|------|--------|------|
  595 +| `q` | string | Y | - | 搜索查询(至少 2 个字符) |
  596 +| `size` | integer | N | 5 | 返回结果数量(1-20) |
  597 +
  598 +#### 请求示例
  599 +
  600 +```bash
  601 +curl "http://localhost:6002/search/instant?q=玩具&size=5"
  602 +```
  603 +
  604 +### 3.9 获取单个文档
  605 +
  606 +- **端点**: `GET /search/{doc_id}`
  607 +- **描述**: 根据文档 ID 获取单个商品详情,用于点击结果后的详情页或排查问题。
  608 +
  609 +#### 路径参数
  610 +
  611 +| 参数 | 类型 | 描述 |
  612 +|------|------|------|
  613 +| `doc_id` | string | 商品或文档 ID |
  614 +
  615 +#### 响应示例
  616 +
  617 +```json
  618 +{
  619 + "id": "12345",
  620 + "source": {
  621 + "title": {
  622 + "zh": "芭比时尚娃娃"
  623 + },
  624 + "min_price": 89.99,
  625 + "category1_name": "玩具"
  626 + }
  627 +}
  628 +```
  629 +
  630 +#### 请求示例
  631 +
  632 +```bash
  633 +curl "http://localhost:6002/search/12345"
  634 +```
  635 +
  636 +---
  637 +
  638 +## 响应格式说明
  639 +
  640 +### 4.1 标准响应结构
  641 +
  642 +```json
  643 +{
  644 + "results": [
  645 + {
  646 + "spu_id": "12345",
  647 + "title": "芭比时尚娃娃",
  648 + "brief": "高品质芭比娃娃",
  649 + "description": "详细描述...",
  650 + "vendor": "美泰",
  651 + "category": "玩具",
  652 + "category_path": "玩具/娃娃/时尚",
  653 + "category_name": "时尚",
  654 + "category_id": "cat_001",
  655 + "category_level": 3,
  656 + "category1_name": "玩具",
  657 + "category2_name": "娃娃",
  658 + "category3_name": "时尚",
  659 + "tags": ["娃娃", "玩具", "女孩"],
  660 + "price": 89.99,
  661 + "compare_at_price": 129.99,
  662 + "currency": "USD",
  663 + "image_url": "https://example.com/image.jpg",
  664 + "in_stock": true,
  665 + "sku_prices": [89.99, 99.99, 109.99],
  666 + "sku_weights": [100, 150, 200],
  667 + "sku_weight_units": ["g", "g", "g"],
  668 + "total_inventory": 500,
  669 + "option1_name": "color",
  670 + "option2_name": "size",
  671 + "option3_name": null,
  672 + "specifications": [
  673 + {"sku_id": "sku_001", "name": "color", "value": "pink"},
  674 + {"sku_id": "sku_001", "name": "size", "value": "standard"}
  675 + ],
  676 + "skus": [
  677 + {
  678 + "sku_id": "67890",
  679 + "price": 89.99,
  680 + "compare_at_price": 129.99,
  681 + "sku": "BARBIE-001",
  682 + "stock": 100,
  683 + "weight": 0.1,
  684 + "weight_unit": "kg",
  685 + "option1_value": "pink",
  686 + "option2_value": "standard",
  687 + "option3_value": null,
  688 + "image_src": "https://example.com/sku1.jpg"
  689 + }
  690 + ],
  691 + "relevance_score": 8.5
  692 + }
  693 + ],
  694 + "total": 118,
  695 + "max_score": 8.5,
  696 + "facets": [
  697 + {
  698 + "field": "category1_name",
  699 + "label": "category1_name",
  700 + "type": "terms",
  701 + "values": [
  702 + {
  703 + "value": "玩具",
  704 + "label": "玩具",
  705 + "count": 85,
  706 + "selected": false
  707 + }
  708 + ]
  709 + },
  710 + {
  711 + "field": "specifications.color",
  712 + "label": "color",
  713 + "type": "terms",
  714 + "values": [
  715 + {
  716 + "value": "pink",
  717 + "label": "pink",
  718 + "count": 30,
  719 + "selected": false
  720 + }
  721 + ]
  722 + }
  723 + ],
  724 + "query_info": {
  725 + "original_query": "芭比娃娃",
  726 + "query_normalized": "芭比娃娃",
  727 + "rewritten_query": "芭比娃娃",
  728 + "detected_language": "zh",
  729 + "translations": {
  730 + "en": "barbie doll"
  731 + },
  732 + "domain": "default"
  733 + },
  734 + "suggestions": [],
  735 + "related_searches": [],
  736 + "took_ms": 45,
  737 + "performance_info": null,
  738 + "debug_info": null
  739 +}
  740 +```
  741 +
  742 +### 4.2 响应字段说明
  743 +
  744 +| 字段 | 类型 | 说明 |
  745 +|------|------|------|
  746 +| `results` | array | 搜索结果列表(SpuResult对象数组) |
  747 +| `results[].spu_id` | string | SPU ID |
  748 +| `results[].title` | string | 商品标题 |
  749 +| `results[].price` | float | 价格(min_price) |
  750 +| `results[].skus` | array | SKU列表(如果指定了`sku_filter_dimension`,则按维度过滤后的SKU) |
  751 +| `results[].relevance_score` | float | 相关性分数 |
  752 +| `total` | integer | 匹配的总文档数 |
  753 +| `max_score` | float | 最高相关性分数 |
  754 +| `facets` | array | 分面统计结果 |
  755 +| `query_info` | object | query处理信息 |
  756 +| `took_ms` | integer | 搜索耗时(毫秒) |
  757 +
  758 +#### 4.2.1 query_info 说明
  759 +
  760 +`query_info` 包含本次搜索的查询解析与处理结果:
  761 +
  762 +| 子字段 | 类型 | 说明 |
  763 +|--------|------|------|
  764 +| `original_query` | string | 用户原始查询 |
  765 +| `query_normalized` | string | 归一化后的查询(去空白、大小写等预处理,用于后续解析与改写) |
  766 +| `rewritten_query` | string | 重写后的查询(同义词/词典扩展等) |
  767 +| `detected_language` | string | 检测到的查询语言(如 `zh`、`en`) |
  768 +| `translations` | object | 翻译结果,键为语言代码,值为翻译文本 |
  769 +| `domain` | string | 查询域(如 `default`、`title`、`brand` 等) |
  770 +
  771 +### 4.3 SpuResult字段说明
  772 +
  773 +| 字段 | 类型 | 说明 |
  774 +|------|------|------|
  775 +| `spu_id` | string | SPU ID |
  776 +| `title` | string | 商品标题(根据language参数自动选择 `title.zh` 或 `title.en`) |
  777 +| `brief` | string | 商品短描述(根据language参数自动选择) |
  778 +| `description` | string | 商品详细描述(根据language参数自动选择) |
  779 +| `vendor` | string | 供应商/品牌(根据language参数自动选择) |
  780 +| `category` | string | 类目(兼容字段,等同于category_name) |
  781 +| `category_path` | string | 类目路径(多级,用于面包屑,根据language参数自动选择) |
  782 +| `category_name` | string | 类目名称(展示用,根据language参数自动选择) |
  783 +| `category_id` | string | 类目ID |
  784 +| `category_level` | integer | 类目层级(1/2/3) |
  785 +| `category1_name` | string | 一级类目名称 |
  786 +| `category2_name` | string | 二级类目名称 |
  787 +| `category3_name` | string | 三级类目名称 |
  788 +| `tags` | array[string] | 标签列表 |
  789 +| `price` | float | 价格(min_price) |
  790 +| `compare_at_price` | float | 原价 |
  791 +| `currency` | string | 货币单位(默认USD) |
  792 +| `image_url` | string | 主图URL |
  793 +| `in_stock` | boolean | 是否有库存(任意SKU有库存即为true) |
  794 +| `sku_prices` | array[float] | 所有SKU价格列表 |
  795 +| `sku_weights` | array[integer] | 所有SKU重量列表 |
  796 +| `sku_weight_units` | array[string] | 所有SKU重量单位列表 |
  797 +| `total_inventory` | integer | 总库存 |
  798 +| `sales` | integer | 销量(展示销量) |
  799 +| `option1_name` | string | 选项1名称(如"color") |
  800 +| `option2_name` | string | 选项2名称(如"size") |
  801 +| `option3_name` | string | 选项3名称 |
  802 +| `specifications` | array[object] | 规格列表(与ES specifications字段对应) |
  803 +| `skus` | array | SKU 列表 |
  804 +| `relevance_score` | float | 相关性分数(默认为 ES 原始分数;当开启 AI 搜索时为融合后的最终分数) |
  805 +
  806 +### 4.4 SkuResult字段说明
  807 +
  808 +| 字段 | 类型 | 说明 |
  809 +|------|------|------|
  810 +| `sku_id` | string | SKU ID |
  811 +| `price` | float | 价格 |
  812 +| `compare_at_price` | float | 原价 |
  813 +| `sku` | string | SKU编码(sku_code) |
  814 +| `stock` | integer | 库存数量 |
  815 +| `weight` | float | 重量 |
  816 +| `weight_unit` | string | 重量单位 |
  817 +| `option1_value` | string | 选项1取值(如color值) |
  818 +| `option2_value` | string | 选项2取值(如size值) |
  819 +| `option3_value` | string | 选项3取值 |
  820 +| `image_src` | string | SKU图片地址 |
  821 +
  822 +### 4.5 多语言字段说明
  823 +
  824 +- `title`, `brief`, `description`, `vendor`, `category_path`, `category_name` 会根据请求的 `language` 参数自动选择对应的中英文字段
  825 +- `language="zh"`: 优先返回 `*_zh` 字段,如果为空则回退到 `*_en` 字段
  826 +- `language="en"`: 优先返回 `*_en` 字段,如果为空则回退到 `*_zh` 字段
  827 +
  828 +---
  829 +
  830 +## 索引接口
  831 +
  832 +### 5.0 为租户创建索引
  833 +
  834 +为租户创建索引需要两个步骤:
  835 +
  836 +1. **创建索引结构**(可选,仅在需要更新 mapping 时执行)
  837 + - 使用脚本创建 ES 索引结构(基于 `mappings/search_products.json`)
  838 + - 如果索引已存在,会提示用户确认(会删除现有数据)
  839 +
  840 +2. **导入数据**(必需)
  841 + - 使用全量索引接口 `/indexer/reindex` 导入数据
  842 +
  843 +**创建索引结构**:
  844 +
  845 +```bash
  846 +./scripts/create_tenant_index.sh 170
  847 +```
  848 +
  849 +脚本会自动从项目根目录的 `.env` 文件加载 ES 配置。
  850 +
  851 +**注意事项**:
  852 +- ⚠️ 如果索引已存在,脚本会提示确认,确认后会删除现有数据
  853 +- 创建索引后,**必须**调用 `/indexer/reindex` 导入数据
  854 +- 如果只是更新数据而不需要修改索引结构,直接使用 `/indexer/reindex` 即可
  855 +
  856 +---
  857 +
  858 +### 5.1 全量索引接口
  859 +
  860 +- **端点**: `POST /indexer/reindex`
  861 +- **描述**: 全量索引,将指定租户的所有SPU数据导入到ES索引(不会删除现有索引)
  862 +
  863 +#### 请求参数
  864 +
  865 +```json
  866 +{
  867 + "tenant_id": "162",
  868 + "batch_size": 500
  869 +}
  870 +```
  871 +
  872 +| 参数 | 类型 | 必填 | 默认值 | 说明 |
  873 +|------|------|------|--------|------|
  874 +| `tenant_id` | string | Y | - | 租户ID |
  875 +| `batch_size` | integer | N | 500 | 批量导入大小 |
  876 +
  877 +#### 响应格式
  878 +
  879 +**成功响应(200 OK)**:
  880 +```json
  881 +{
  882 + "success": true,
  883 + "total": 1000,
  884 + "indexed": 1000,
  885 + "failed": 0,
  886 + "elapsed_time": 12.34,
  887 + "index_name": "search_products",
  888 + "tenant_id": "162"
  889 +}
  890 +```
  891 +
  892 +**错误响应**:
  893 +- `400 Bad Request`: 参数错误
  894 +- `503 Service Unavailable`: 服务未初始化
  895 +
  896 +#### 请求示例
  897 +
  898 +**全量索引(不会删除现有索引)**:
  899 +```bash
  900 +curl -X POST "http://localhost:6004/indexer/reindex" \
  901 + -H "Content-Type: application/json" \
  902 + -d '{
  903 + "tenant_id": "162",
  904 + "batch_size": 500
  905 + }'
  906 +```
  907 +
  908 +**查看日志**:
  909 +```bash
  910 +# 查看API日志(包含索引操作日志)
  911 +tail -f logs/api.log
  912 +
  913 +# 或者查看所有日志文件
  914 +tail -f logs/*.log
  915 +```
  916 +
  917 +> ⚠️ **重要提示**:如需 **创建索引结构**,请参考 [5.0 为租户创建索引](#50-为租户创建索引) 章节,使用 `scripts/recreate_all_tenant_indices.py` 脚本。创建后需要调用 `/indexer/reindex` 导入数据。
  918 +
  919 +**查看索引日志**:
  920 +
  921 +索引操作的所有关键信息都会记录到 `logs/indexer.log` 文件中(JSON 格式),包括:
  922 +- 请求开始和结束时间
  923 +- 租户ID、SPU ID、操作类型
  924 +- 每个SPU的处理状态
  925 +- ES批量写入结果
  926 +- 成功/失败统计和详细错误信息
  927 +
  928 +```bash
  929 +# 实时查看索引日志(包含全量和增量索引的所有操作)
  930 +tail -f logs/indexer.log
  931 +
  932 +# 使用 grep 查询(简单方式)
  933 +# 查看全量索引日志
  934 +grep "\"index_type\":\"bulk\"" logs/indexer.log | tail -100
  935 +
  936 +# 查看增量索引日志
  937 +grep "\"index_type\":\"incremental\"" logs/indexer.log | tail -100
  938 +
  939 +# 查看特定租户的索引日志
  940 +grep "\"tenant_id\":\"162\"" logs/indexer.log | tail -100
  941 +
  942 +# 使用 jq 查询(推荐,更精确的 JSON 查询)
  943 +# 安装 jq: sudo apt-get install jq 或 brew install jq
  944 +
  945 +# 查看全量索引日志
  946 +cat logs/indexer.log | jq 'select(.index_type == "bulk")' | tail -100
  947 +
  948 +# 查看增量索引日志
  949 +cat logs/indexer.log | jq 'select(.index_type == "incremental")' | tail -100
  950 +
  951 +# 查看特定租户的索引日志
  952 +cat logs/indexer.log | jq 'select(.tenant_id == "162")' | tail -100
  953 +
  954 +# 查看失败的索引操作
  955 +cat logs/indexer.log | jq 'select(.operation == "request_complete" and .failed_count > 0)'
  956 +
  957 +# 查看特定SPU的处理日志
  958 +cat logs/indexer.log | jq 'select(.spu_id == "123")'
  959 +
  960 +# 查看最近的索引请求统计
  961 +cat logs/indexer.log | jq 'select(.operation == "request_complete") | {timestamp, index_type, tenant_id, total_count, success_count, failed_count, elapsed_time}'
  962 +```
  963 +
  964 +### 5.2 增量索引接口
  965 +
  966 +- **端点**: `POST /indexer/index`
  967 +- **描述**: 增量索引接口,根据指定的SPU ID列表进行索引,直接将数据写入ES。用于增量更新指定商品。
  968 +
  969 +**删除说明**:
  970 +- `spu_ids`中的SPU:如果数据库`deleted=1`,自动从ES删除,响应状态为`deleted`
  971 +- `delete_spu_ids`中的SPU:直接删除,响应状态为`deleted`、`not_found`或`failed`
  972 +
  973 +#### 请求参数
  974 +
  975 +```json
  976 +{
  977 + "tenant_id": "162",
  978 + "spu_ids": ["123", "456", "789"],
  979 + "delete_spu_ids": ["100", "101"]
  980 +}
  981 +```
  982 +
  983 +| 参数 | 类型 | 必填 | 说明 |
  984 +|------|------|------|------|
  985 +| `tenant_id` | string | Y | 租户ID |
  986 +| `spu_ids` | array[string] | N | SPU ID列表(1-100个),要索引的SPU。如果为空,则只执行删除操作 |
  987 +| `delete_spu_ids` | array[string] | N | 显式指定要删除的SPU ID列表(1-100个),可选。无论数据库状态如何,都会从ES中删除这些SPU |
  988 +
  989 +**注意**:
  990 +- `spu_ids` 和 `delete_spu_ids` 不能同时为空
  991 +- 每个列表最多支持100个SPU ID
  992 +- 如果SPU在`spu_ids`中且数据库`deleted=1`,会自动从ES删除(自动检测删除)
  993 +
  994 +#### 响应格式
  995 +
  996 +```json
  997 +{
  998 + "spu_ids": [
  999 + {
  1000 + "spu_id": "123",
  1001 + "status": "indexed"
  1002 + },
  1003 + {
  1004 + "spu_id": "456",
  1005 + "status": "deleted"
  1006 + },
  1007 + {
  1008 + "spu_id": "789",
  1009 + "status": "failed",
  1010 + "msg": "SPU not found (unexpected)"
  1011 + }
  1012 + ],
  1013 + "delete_spu_ids": [
  1014 + {
  1015 + "spu_id": "100",
  1016 + "status": "deleted"
  1017 + },
  1018 + {
  1019 + "spu_id": "101",
  1020 + "status": "not_found"
  1021 + },
  1022 + {
  1023 + "spu_id": "102",
  1024 + "status": "failed",
  1025 + "msg": "Failed to delete from ES: Connection timeout"
  1026 + }
  1027 + ],
  1028 + "total": 6,
  1029 + "success_count": 4,
  1030 + "failed_count": 2,
  1031 + "elapsed_time": 1.23,
  1032 + "index_name": "search_products",
  1033 + "tenant_id": "162"
  1034 +}
  1035 +```
  1036 +
  1037 +| 字段 | 类型 | 说明 |
  1038 +|------|------|------|
  1039 +| `spu_ids` | array | spu_ids对应的响应列表,每个元素包含 `spu_id` 和 `status` |
  1040 +| `spu_ids[].status` | string | 状态:`indexed`(已索引)、`deleted`(已删除,自动检测)、`failed`(失败) |
  1041 +| `spu_ids[].msg` | string | 当status为`failed`时,包含失败原因(可选) |
  1042 +| `delete_spu_ids` | array | delete_spu_ids对应的响应列表,每个元素包含 `spu_id` 和 `status` |
  1043 +| `delete_spu_ids[].status` | string | 状态:`deleted`(已删除)、`not_found`(ES中不存在)、`failed`(失败) |
  1044 +| `delete_spu_ids[].msg` | string | 当status为`failed`时,包含失败原因(可选) |
  1045 +| `total` | integer | 总处理数量(spu_ids数量 + delete_spu_ids数量) |
  1046 +| `success_count` | integer | 成功数量(indexed + deleted + not_found) |
  1047 +| `failed_count` | integer | 失败数量 |
  1048 +| `elapsed_time` | float | 耗时(秒) |
  1049 +| `index_name` | string | 索引名称 |
  1050 +| `tenant_id` | string | 租户ID |
  1051 +
  1052 +**状态说明**:
  1053 +- `spu_ids` 的状态:
  1054 + - `indexed`: SPU已成功索引到ES
  1055 + - `deleted`: SPU在数据库中被标记为deleted=1,已从ES删除(自动检测)
  1056 + - `failed`: 处理失败,会包含`msg`字段说明失败原因
  1057 +- `delete_spu_ids` 的状态:
  1058 + - `deleted`: SPU已从ES成功删除
  1059 + - `not_found`: SPU在ES中不存在(也算成功,可能已经被删除过)
  1060 + - `failed`: 删除失败,会包含`msg`字段说明失败原因
  1061 +
  1062 +#### 请求示例
  1063 +
  1064 +**示例1:普通增量索引(自动检测删除)**:
  1065 +```bash
  1066 +curl -X POST "http://localhost:6004/indexer/index" \
  1067 + -H "Content-Type: application/json" \
  1068 + -d '{
  1069 + "tenant_id": "162",
  1070 + "spu_ids": ["123", "456", "789"]
  1071 + }'
  1072 +```
  1073 +说明:如果SPU 456在数据库中`deleted=1`,会自动从ES删除,在响应中`spu_ids`列表里456的状态为`deleted`。
  1074 +
  1075 +**示例2:显式删除(批量删除)**:
  1076 +```bash
  1077 +curl -X POST "http://localhost:6004/indexer/index" \
  1078 + -H "Content-Type: application/json" \
  1079 + -d '{
  1080 + "tenant_id": "162",
  1081 + "spu_ids": ["123", "456"],
  1082 + "delete_spu_ids": ["100", "101", "102"]
  1083 + }'
  1084 +```
  1085 +说明:SPU 100、101、102会被显式删除,无论数据库状态如何。
  1086 +
  1087 +**示例3:仅删除(不索引)**:
  1088 +```bash
  1089 +curl -X POST "http://localhost:6004/indexer/index" \
  1090 + -H "Content-Type: application/json" \
  1091 + -d '{
  1092 + "tenant_id": "162",
  1093 + "spu_ids": [],
  1094 + "delete_spu_ids": ["100", "101"]
  1095 + }'
  1096 +```
  1097 +说明:只执行删除操作,不进行索引。
  1098 +
  1099 +**示例4:混合操作(索引+删除)**:
  1100 +```bash
  1101 +curl -X POST "http://localhost:6004/indexer/index" \
  1102 + -H "Content-Type: application/json" \
  1103 + -d '{
  1104 + "tenant_id": "162",
  1105 + "spu_ids": ["123", "456", "789"],
  1106 + "delete_spu_ids": ["100", "101"]
  1107 + }'
  1108 +```
  1109 +说明:同时执行索引和删除操作。
  1110 +
  1111 +#### 日志说明
  1112 +
  1113 +增量索引操作的所有关键信息都会记录到 `logs/indexer.log` 文件中(JSON格式),包括:
  1114 +- 请求开始和结束时间
  1115 +- 每个SPU的处理状态(获取、转换、索引、删除)
  1116 +- ES批量写入结果
  1117 +- 成功/失败统计
  1118 +- 详细的错误信息
  1119 +
  1120 +日志查询方式请参考[5.1节查看索引日志](#51-全量重建索引接口)部分。
  1121 +
  1122 +### 5.3 查询文档接口
  1123 +
  1124 +- **端点**: `POST /indexer/documents`
  1125 +- **描述**: 查询文档接口,根据SPU ID列表获取ES文档数据(**不写入ES**)。用于查看、调试或验证SPU数据。
  1126 +
  1127 +#### 请求参数
  1128 +
  1129 +```json
  1130 +{
  1131 + "tenant_id": "162",
  1132 + "spu_ids": ["123", "456", "789"]
  1133 +}
  1134 +```
  1135 +
  1136 +| 参数 | 类型 | 必填 | 说明 |
  1137 +|------|------|------|------|
  1138 +| `tenant_id` | string | Y | 租户ID |
  1139 +| `spu_ids` | array[string] | Y | SPU ID列表(1-100个) |
  1140 +
  1141 +#### 响应格式
  1142 +
  1143 +```json
  1144 +{
  1145 + "success": [
  1146 + {
  1147 + "spu_id": "123",
  1148 + "document": {
  1149 + "tenant_id": "162",
  1150 + "spu_id": "123",
  1151 + "title": {
  1152 + "zh": "商品标题"
  1153 + },
  1154 + ...
  1155 + }
  1156 + },
  1157 + {
  1158 + "spu_id": "456",
  1159 + "document": {...}
  1160 + }
  1161 + ],
  1162 + "failed": [
  1163 + {
  1164 + "spu_id": "789",
  1165 + "error": "SPU not found or deleted"
  1166 + }
  1167 + ],
  1168 + "total": 3,
  1169 + "success_count": 2,
  1170 + "failed_count": 1
  1171 +}
  1172 +```
  1173 +
  1174 +| 字段 | 类型 | 说明 |
  1175 +|------|------|------|
  1176 +| `success` | array | 成功获取的SPU列表,每个元素包含 `spu_id` 和 `document`(完整的ES文档数据) |
  1177 +| `failed` | array | 失败的SPU列表,每个元素包含 `spu_id` 和 `error`(失败原因) |
  1178 +| `total` | integer | 总SPU数量 |
  1179 +| `success_count` | integer | 成功数量 |
  1180 +| `failed_count` | integer | 失败数量 |
  1181 +
  1182 +#### 请求示例
  1183 +
  1184 +**单个SPU查询**:
  1185 +```bash
  1186 +curl -X POST "http://localhost:6004/indexer/documents" \
  1187 + -H "Content-Type: application/json" \
  1188 + -d '{
  1189 + "tenant_id": "162",
  1190 + "spu_ids": ["123"]
  1191 + }'
  1192 +```
  1193 +
  1194 +**批量SPU查询**:
  1195 +```bash
  1196 +curl -X POST "http://localhost:6004/indexer/documents" \
  1197 + -H "Content-Type: application/json" \
  1198 + -d '{
  1199 + "tenant_id": "162",
  1200 + "spu_ids": ["123", "456", "789"]
  1201 + }'
  1202 +```
  1203 +
  1204 +#### 与 `/indexer/index` 的区别
  1205 +
  1206 +| 接口 | 功能 | 是否写入ES | 返回内容 |
  1207 +|------|------|-----------|----------|
  1208 +| `/indexer/documents` | 查询SPU文档数据 | 否 | 返回完整的ES文档数据 |
  1209 +| `/indexer/index` | 增量索引 | 是 | 返回成功/失败列表和统计信息 |
  1210 +
  1211 +**使用场景**:
  1212 +- `/indexer/documents`:用于查看、调试或验证SPU数据,不修改ES索引
  1213 +- `/indexer/index`:用于实际的增量索引操作,将更新的SPU数据同步到ES
  1214 +
  1215 +### 5.4 索引健康检查接口
  1216 +
  1217 +- **端点**: `GET /indexer/health`
  1218 +- **描述**: 检查索引服务的健康状态
  1219 +
  1220 +#### 响应格式
  1221 +
  1222 +```json
  1223 +{
  1224 + "status": "available",
  1225 + "database": "connected",
  1226 + "preloaded_data": {
  1227 + "category_mappings": 150
  1228 + }
  1229 +}
  1230 +```
  1231 +
  1232 +#### 请求示例
  1233 +
  1234 +```bash
  1235 +curl -X GET "http://localhost:6004/indexer/health"
  1236 +```
  1237 +
  1238 +---
  1239 +
  1240 +## 管理接口
  1241 +
  1242 +### 6.1 健康检查
  1243 +
  1244 +- **端点**: `GET /admin/health`
  1245 +- **描述**: 检查服务与依赖(如 Elasticsearch)状态。
  1246 +
  1247 +```json
  1248 +{
  1249 + "status": "healthy",
  1250 + "elasticsearch": "connected",
  1251 + "tenant_id": "tenant1"
  1252 +}
  1253 +```
  1254 +
  1255 +### 6.2 获取配置
  1256 +
  1257 +- **端点**: `GET /admin/config`
  1258 +- **描述**: 返回当前租户的脱敏配置,便于核对索引及排序表达式。
  1259 +
  1260 +```json
  1261 +{
  1262 + "tenant_id": "tenant1",
  1263 + "tenant_name": "Tenant1 Test Instance",
  1264 + "es_index_name": "search_tenant1",
  1265 + "num_fields": 20,
  1266 + "num_indexes": 4,
  1267 + "supported_languages": ["zh", "en", "ru"],
  1268 + "ranking_expression": "bm25() + 0.2*text_embedding_relevance()",
  1269 + "spu_enabled": false
  1270 +}
  1271 +```
  1272 +
  1273 +### 6.3 索引统计
  1274 +
  1275 +- **端点**: `GET /admin/stats`
  1276 +- **描述**: 获取索引文档数量与磁盘大小,方便监控。
  1277 +
  1278 +```json
  1279 +{
  1280 + "index_name": "search_tenant1",
  1281 + "document_count": 10000,
  1282 + "size_mb": 523.45
  1283 +}
  1284 +```
  1285 +
  1286 +---
  1287 +
  1288 +## 常见场景示例
  1289 +
  1290 +### 7.1 基础搜索与排序
  1291 +
  1292 +**按价格从低到高排序**:
  1293 +```json
  1294 +{
  1295 + "query": "玩具",
  1296 + "size": 20,
  1297 + "from": 0,
  1298 + "sort_by": "price",
  1299 + "sort_order": "asc"
  1300 +}
  1301 +```
  1302 +
  1303 +**按价格从高到低排序**:
  1304 +```json
  1305 +{
  1306 + "query": "玩具",
  1307 + "size": 20,
  1308 + "from": 0,
  1309 + "sort_by": "price",
  1310 + "sort_order": "desc"
  1311 +}
  1312 +```
  1313 +
  1314 +**按销量从高到低排序**:
  1315 +```json
  1316 +{
  1317 + "query": "玩具",
  1318 + "size": 20,
  1319 + "from": 0,
  1320 + "sort_by": "sales",
  1321 + "sort_order": "desc"
  1322 +}
  1323 +```
  1324 +
  1325 +**按默认(相关性)排序**:
  1326 +```json
  1327 +{
  1328 + "query": "玩具",
  1329 + "size": 20,
  1330 + "from": 0
  1331 +}
  1332 +```
  1333 +
  1334 +### 7.2 过滤搜索
  1335 +
  1336 +**需求**: 搜索"玩具",筛选类目为"益智玩具",价格在50-200之间
  1337 +
  1338 +```json
  1339 +{
  1340 + "query": "玩具",
  1341 + "size": 20,
  1342 + "language": "zh",
  1343 + "filters": {
  1344 + "category_name": "益智玩具"
  1345 + },
  1346 + "range_filters": {
  1347 + "min_price": {
  1348 + "gte": 50,
  1349 + "lte": 200
  1350 + }
  1351 + }
  1352 +}
  1353 +```
  1354 +
  1355 +**需求**: 搜索"手机",筛选多个品牌,价格范围
  1356 +
  1357 +```json
  1358 +{
  1359 + "query": "手机",
  1360 + "size": 20,
  1361 + "language": "zh",
  1362 + "filters": {
  1363 + "vendor.zh.keyword": ["品牌A", "品牌B"]
  1364 + },
  1365 + "range_filters": {
  1366 + "min_price": {
  1367 + "gte": 50,
  1368 + "lte": 200
  1369 + }
  1370 + }
  1371 +}
  1372 +```
  1373 +
  1374 +### 7.3 分面搜索
  1375 +
  1376 +**需求**: 搜索"玩具",获取类目和规格的分面统计,用于构建筛选器
  1377 +
  1378 +```json
  1379 +{
  1380 + "query": "玩具",
  1381 + "size": 20,
  1382 + "language": "zh",
  1383 + "facets": [
  1384 + {"field": "category1_name", "size": 15, "type": "terms"},
  1385 + {"field": "category2_name", "size": 10, "type": "terms"},
  1386 + {"field": "specifications", "size": 10, "type": "terms"}
  1387 + ]
  1388 +}
  1389 +```
  1390 +
  1391 +**需求**: 搜索"手机",获取价格区间和规格的分面统计
  1392 +
  1393 +```json
  1394 +{
  1395 + "query": "手机",
  1396 + "size": 20,
  1397 + "language": "zh",
  1398 + "facets": [
  1399 + {
  1400 + "field": "min_price",
  1401 + "type": "range",
  1402 + "ranges": [
  1403 + {"key": "0-50", "to": 50},
  1404 + {"key": "50-100", "from": 50, "to": 100},
  1405 + {"key": "100-200", "from": 100, "to": 200},
  1406 + {"key": "200+", "from": 200}
  1407 + ]
  1408 + },
  1409 + {
  1410 + "field": "specifications",
  1411 + "size": 10,
  1412 + "type": "terms"
  1413 + }
  1414 + ]
  1415 +}
  1416 +```
  1417 +
  1418 +### 7.4 规格过滤与分面
  1419 +
  1420 +**需求**: 搜索"手机",筛选color为"white"的商品
  1421 +
  1422 +```json
  1423 +{
  1424 + "query": "手机",
  1425 + "size": 20,
  1426 + "language": "zh",
  1427 + "filters": {
  1428 + "specifications": {
  1429 + "name": "color",
  1430 + "value": "white"
  1431 + }
  1432 + }
  1433 +}
  1434 +```
  1435 +
  1436 +**需求**: 搜索"手机",筛选color为"white"且size为"256GB"的商品
  1437 +
  1438 +```json
  1439 +{
  1440 + "query": "手机",
  1441 + "size": 20,
  1442 + "language": "zh",
  1443 + "filters": {
  1444 + "specifications": [
  1445 + {"name": "color", "value": "white"},
  1446 + {"name": "size", "value": "256GB"}
  1447 + ]
  1448 + }
  1449 +}
  1450 +```
  1451 +
  1452 +**需求**: 搜索"手机",筛选size为"3"、"4"或"5",且color为"green"的商品
  1453 +
  1454 +```json
  1455 +{
  1456 + "query": "手机",
  1457 + "size": 20,
  1458 + "language": "zh",
  1459 + "filters": {
  1460 + "specifications": [
  1461 + {"name": "size", "value": "3"},
  1462 + {"name": "size", "value": "4"},
  1463 + {"name": "size", "value": "5"},
  1464 + {"name": "color", "value": "green"}
  1465 + ]
  1466 + }
  1467 +}
  1468 +```
  1469 +
  1470 +**需求**: 搜索"手机",获取所有规格的分面统计
  1471 +
  1472 +```json
  1473 +{
  1474 + "query": "手机",
  1475 + "size": 20,
  1476 + "language": "zh",
  1477 + "facets": [
  1478 + {"field": "specifications", "size": 10, "type": "terms"}
  1479 + ]
  1480 +}
  1481 +```
  1482 +
  1483 +**需求**: 只获取"color"和"size"规格的分面统计
  1484 +
  1485 +```json
  1486 +{
  1487 + "query": "手机",
  1488 + "size": 20,
  1489 + "language": "zh",
  1490 + "facets": [
  1491 + {"field": "specifications.color", "size": 20, "type": "terms"},
  1492 + {"field": "specifications.size", "size": 15, "type": "terms"}
  1493 + ]
  1494 +}
  1495 +```
  1496 +
  1497 +**需求**: 搜索"手机",筛选类目和规格,并获取对应的分面统计
  1498 +
  1499 +```json
  1500 +{
  1501 + "query": "手机",
  1502 + "size": 20,
  1503 + "language": "zh",
  1504 + "filters": {
  1505 + "category_name": "手机",
  1506 + "specifications": {
  1507 + "name": "color",
  1508 + "value": "white"
  1509 + }
  1510 + },
  1511 + "facets": [
  1512 + {"field": "category1_name", "size": 15, "type": "terms"},
  1513 + {"field": "category2_name", "size": 10, "type": "terms"},
  1514 + {"field": "specifications.color", "size": 20, "type": "terms"},
  1515 + {"field": "specifications.size", "size": 15, "type": "terms"}
  1516 + ]
  1517 +}
  1518 +```
  1519 +
  1520 +### 7.5 SKU筛选
  1521 +
  1522 +**需求**: 搜索"芭比娃娃",每个SPU下按颜色筛选,每种颜色只显示一个SKU
  1523 +
  1524 +```json
  1525 +{
  1526 + "query": "芭比娃娃",
  1527 + "size": 20,
  1528 + "sku_filter_dimension": ["color"]
  1529 +}
  1530 +```
  1531 +
  1532 +**说明**:
  1533 +- 如果 `option1_name` 为 `"color"`,则使用 `sku_filter_dimension: ["color"]` 可以按颜色分组
  1534 +- 每个SPU下,每种颜色只会返回第一个SKU
  1535 +- 如果维度不匹配,返回所有SKU(不进行过滤)
  1536 +
  1537 +### 7.6 布尔表达式搜索
  1538 +
  1539 +**需求**: 搜索包含"手机"和"智能"的商品,排除"二手"
  1540 +
  1541 +```json
  1542 +{
  1543 + "query": "手机 AND 智能 ANDNOT 二手",
  1544 + "size": 20
  1545 +}
  1546 +```
  1547 +
  1548 +### 7.7 分页查询
  1549 +
  1550 +**需求**: 获取第2页结果(每页20条)
  1551 +
  1552 +```json
  1553 +{
  1554 + "query": "手机",
  1555 + "size": 20,
  1556 + "from": 20
  1557 +}
  1558 +```
  1559 +
  1560 +---
  1561 +
  1562 +## 数据模型
  1563 +
  1564 +### 8.1 商品字段定义
  1565 +
  1566 +| 字段名 | 类型 | 描述 |
  1567 +|--------|------|------|
  1568 +| `tenant_id` | keyword | 租户ID(多租户隔离) |
  1569 +| `spu_id` | keyword | SPU ID |
  1570 +| `title.<lang>` | object/text | 商品标题(多语言对象,如 `title.zh`, `title.en`) |
  1571 +| `brief.<lang>` | object/text | 商品短描述(多语言对象,如 `brief.zh`, `brief.en`) |
  1572 +| `description.<lang>` | object/text | 商品详细描述(多语言对象,如 `description.zh`, `description.en`) |
  1573 +| `vendor.<lang>` | object/text | 供应商/品牌(多语言对象,且带 keyword 子字段,如 `vendor.zh.keyword`) |
  1574 +| `category_path.<lang>` | object/text | 类目路径(多语言对象,用于搜索,如 `category_path.zh`) |
  1575 +| `category_name_text.<lang>` | object/text | 类目名称(多语言对象,用于搜索,如 `category_name_text.zh`) |
  1576 +| `category_id` | keyword | 类目ID |
  1577 +| `category_name` | keyword | 类目名称(用于过滤) |
  1578 +| `category_level` | integer | 类目层级 |
  1579 +| `category1_name`, `category2_name`, `category3_name` | keyword | 多级类目名称(用于过滤和分面) |
  1580 +| `tags` | keyword | 标签(数组) |
  1581 +| `specifications` | nested | 规格(嵌套对象数组) |
  1582 +| `option1_name`, `option2_name`, `option3_name` | keyword | 选项名称 |
  1583 +| `min_price`, `max_price` | float | 最低/最高价格 |
  1584 +| `compare_at_price` | float | 原价 |
  1585 +| `sku_prices` | float | SKU价格列表(数组) |
  1586 +| `sku_weights` | long | SKU重量列表(数组) |
  1587 +| `sku_weight_units` | keyword | SKU重量单位列表(数组) |
  1588 +| `total_inventory` | long | 总库存 |
  1589 +| `sales` | long | 销量(展示销量) |
  1590 +| `skus` | nested | SKU详细信息(嵌套对象数组) |
  1591 +| `create_time`, `update_time` | date | 创建/更新时间 |
  1592 +| `title_embedding` | dense_vector | 标题向量(1024维,仅用于搜索) |
  1593 +| `image_embedding` | nested | 图片向量(嵌套,仅用于搜索) |
  1594 +
  1595 +> 所有租户共享统一的索引结构。文本字段支持中英文双语,后端根据 `language` 参数自动选择对应字段返回。
  1596 +
  1597 +### 8.2 字段类型速查
  1598 +
  1599 +| 类型 | ES Mapping | 用途 |
  1600 +|------|------------|------|
  1601 +| `text` | `text` | 全文检索(支持中英文分析器) |
  1602 +| `keyword` | `keyword` | 精确匹配、聚合、排序 |
  1603 +| `integer` | `integer` | 整数 |
  1604 +| `long` | `long` | 长整数 |
  1605 +| `float` | `float` | 浮点数 |
  1606 +| `date` | `date` | 日期时间 |
  1607 +| `nested` | `nested` | 嵌套对象(specifications, skus, image_embedding) |
  1608 +| `dense_vector` | `dense_vector` | 向量字段(title_embedding,仅用于搜索) |
  1609 +
  1610 +### 8.3 常用字段列表
  1611 +
  1612 +#### 过滤字段
  1613 +
  1614 +- `category_name`: 类目名称
  1615 +- `category1_name`, `category2_name`, `category3_name`: 多级类目
  1616 +- `category_id`: 类目ID
  1617 +- `vendor.zh.keyword`, `vendor.en.keyword`: 供应商/品牌(使用keyword子字段)
  1618 +- `tags`: 标签(keyword类型)
  1619 +- `option1_name`, `option2_name`, `option3_name`: 选项名称
  1620 +- `specifications`: 规格过滤(嵌套字段,格式见[过滤器详解](#33-过滤器详解))
  1621 +
  1622 +#### 范围字段
  1623 +
  1624 +- `min_price`: 最低价格
  1625 +- `max_price`: 最高价格
  1626 +- `compare_at_price`: 原价
  1627 +- `create_time`: 创建时间
  1628 +- `update_time`: 更新时间
  1629 +
  1630 +#### 排序字段
  1631 +
  1632 +- `price`: 价格(后端自动根据sort_order映射:asc→min_price,desc→max_price)
  1633 +- `sales`: 销量
  1634 +- `create_time`: 创建时间
  1635 +- `update_time`: 更新时间
  1636 +- `relevance_score`: 相关性分数(默认,不指定sort_by时使用)
  1637 +
  1638 +**注意**: 前端只需传 `price`,后端会自动处理:
  1639 +- `sort_by: "price"` + `sort_order: "asc"` → 按 `min_price` 升序(价格从低到高)
  1640 +- `sort_by: "price"` + `sort_order: "desc"` → 按 `max_price` 降序(价格从高到低)
  1641 +
  1642 +### 8.4 支持的分析器
  1643 +
  1644 +| 分析器 | 语言 | 描述 |
  1645 +|--------|------|------|
  1646 +| `index_ansj` | 中文 | 中文索引分析器(用于中文字段) |
  1647 +| `query_ansj` | 中文 | 中文查询分析器(用于中文字段) |
  1648 +| `hanlp_index` ⚠️ TODO(暂不支持) | 中文 | 中文索引分析器(用于中文字段) |
  1649 +| `hanlp_standard` ⚠️ TODO(暂不支持) | 中文 | 中文查询分析器(用于中文字段) |
  1650 +| `english` | 英文 | 标准英文分析器(用于英文字段) |
  1651 +| `lowercase` | - | 小写标准化器(用于keyword子字段) |
@@ -12,13 +12,9 @@ langchain-openai&gt;=0.2.0 @@ -12,13 +12,9 @@ langchain-openai&gt;=0.2.0
12 langgraph>=1.0.0 12 langgraph>=1.0.0
13 openai>=1.12.0 13 openai>=1.12.0
14 14
15 -# Embeddings & Vision  
16 -clip-client>=3.5.0 # CLIP-as-Service client 15 +# Vision (VLM image analysis)
17 Pillow>=10.2.0 # Image processing 16 Pillow>=10.2.0 # Image processing
18 17
19 -# Vector Database  
20 -pymilvus>=2.3.6  
21 -  
22 # Databases 18 # Databases
23 pymongo>=4.6.1 19 pymongo>=4.6.1
24 20
scripts/check_services.sh
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
2 # ============================================================================= 2 # =============================================================================
3 # OmniShopAgent - 服务健康检查脚本 3 # OmniShopAgent - 服务健康检查脚本
4 -# 检查 Milvus、CLIP、Streamlit 等依赖服务状态 4 +# 检查 Streamlit、Search API 等依赖
5 # ============================================================================= 5 # =============================================================================
6 set -euo pipefail 6 set -euo pipefail
7 7
@@ -49,40 +49,16 @@ else @@ -49,40 +49,16 @@ else
49 echo -e "${RED}FAIL${NC} 未找到" 49 echo -e "${RED}FAIL${NC} 未找到"
50 fi 50 fi
51 51
52 -# 4. Milvus  
53 -echo -n "[Milvus] "  
54 -if command -v docker &>/dev/null; then  
55 - if docker ps --format '{{.Names}}' 2>/dev/null | grep -q milvus-standalone; then  
56 - if curl -s -o /dev/null -w "%{http_code}" http://localhost:9091/healthz 2>/dev/null | grep -q 200; then  
57 - echo -e "${GREEN}OK${NC} localhost:19530"  
58 - else  
59 - echo -e "${YELLOW}WARN${NC} 容器运行中,健康检查未响应"  
60 - fi  
61 - else  
62 - echo -e "${YELLOW}WARN${NC} 未运行 (docker compose up -d)"  
63 - fi  
64 -else  
65 - echo -e "${YELLOW}SKIP${NC} Docker 未安装"  
66 -fi  
67 -  
68 -# 5. CLIP 服务(可选)  
69 -echo -n "[CLIP] "  
70 -if timeout 2 bash -c 'echo >/dev/tcp/localhost/51000' 2>/dev/null; then  
71 - echo -e "${GREEN}OK${NC} localhost:51000"  
72 -else  
73 - echo -e "${YELLOW}WARN${NC} 未运行 (图像搜索需启动: python -m clip_server launch)"  
74 -fi  
75 -  
76 -# 6. 数据目录 52 +# 4. 数据目录(可选,用于图片上传)
77 echo -n "[数据] " 53 echo -n "[数据] "
78 if [ -d "$PROJECT_ROOT/data/images" ] && [ -f "$PROJECT_ROOT/data/styles.csv" ]; then 54 if [ -d "$PROJECT_ROOT/data/images" ] && [ -f "$PROJECT_ROOT/data/styles.csv" ]; then
79 IMG_COUNT=$(find "$PROJECT_ROOT/data/images" -name "*.jpg" 2>/dev/null | wc -l) 55 IMG_COUNT=$(find "$PROJECT_ROOT/data/images" -name "*.jpg" 2>/dev/null | wc -l)
80 echo -e "${GREEN}OK${NC} $IMG_COUNT 张图片" 56 echo -e "${GREEN}OK${NC} $IMG_COUNT 张图片"
81 else 57 else
82 - echo -e "${YELLOW}WARN${NC} 未找到 data/images 或 data/styles.csv (运行 download_dataset.py)" 58 + echo -e "${YELLOW}WARN${NC} 未找到 data/images 或 data/styles.csv (可选,用于图片风格分析)"
83 fi 59 fi
84 60
85 -# 7. Streamlit 61 +# 5. Streamlit
86 echo -n "[Streamlit] " 62 echo -n "[Streamlit] "
87 if pgrep -f "streamlit run app.py" >/dev/null 2>&1; then 63 if pgrep -f "streamlit run app.py" >/dev/null 2>&1; then
88 echo -e "${GREEN}OK${NC} 运行中" 64 echo -e "${GREEN}OK${NC} 运行中"
scripts/index_data.py deleted
@@ -1,467 +0,0 @@ @@ -1,467 +0,0 @@
1 -"""  
2 -Data Indexing Script  
3 -Generates embeddings for products and stores them in Milvus  
4 -"""  
5 -  
6 -import csv  
7 -import logging  
8 -import os  
9 -import sys  
10 -from pathlib import Path  
11 -from typing import Any, Dict, Optional  
12 -  
13 -from tqdm import tqdm  
14 -  
15 -# Add parent directory to path  
16 -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  
17 -  
18 -# Import config and settings first  
19 -# Direct imports from files to avoid __init__.py circular issues  
20 -import importlib.util  
21 -  
22 -from app.config import get_absolute_path, settings  
23 -  
24 -  
25 -def load_service_module(module_name, file_name):  
26 - """Load a service module directly from file"""  
27 - spec = importlib.util.spec_from_file_location(  
28 - module_name,  
29 - os.path.join(  
30 - os.path.dirname(os.path.dirname(os.path.abspath(__file__))),  
31 - f"app/services/{file_name}",  
32 - ),  
33 - )  
34 - module = importlib.util.module_from_spec(spec)  
35 - spec.loader.exec_module(module)  
36 - return module  
37 -  
38 -  
39 -embedding_module = load_service_module("embedding_service", "embedding_service.py")  
40 -milvus_module = load_service_module("milvus_service", "milvus_service.py")  
41 -  
42 -EmbeddingService = embedding_module.EmbeddingService  
43 -MilvusService = milvus_module.MilvusService  
44 -  
45 -# Configure logging  
46 -logging.basicConfig(  
47 - level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"  
48 -)  
49 -logger = logging.getLogger(__name__)  
50 -  
51 -  
52 -class DataIndexer:  
53 - """Index product data by generating and storing embeddings"""  
54 -  
55 - def __init__(self):  
56 - """Initialize services"""  
57 - self.embedding_service = EmbeddingService()  
58 - self.milvus_service = MilvusService()  
59 -  
60 - self.image_dir = Path(get_absolute_path(settings.image_data_path))  
61 - self.styles_csv = get_absolute_path("./data/styles.csv")  
62 - self.images_csv = get_absolute_path("./data/images.csv")  
63 -  
64 - # Load product data from CSV  
65 - self.products = self._load_products_from_csv()  
66 -  
67 - def _load_products_from_csv(self) -> Dict[int, Dict[str, Any]]:  
68 - """Load products from CSV files"""  
69 - products = {}  
70 -  
71 - # Load images mapping  
72 - images_dict = {}  
73 - with open(self.images_csv, "r", encoding="utf-8") as f:  
74 - reader = csv.DictReader(f)  
75 - for row in reader:  
76 - product_id = int(row["filename"].split(".")[0])  
77 - images_dict[product_id] = row["link"]  
78 -  
79 - # Load styles/products  
80 - with open(self.styles_csv, "r", encoding="utf-8") as f:  
81 - reader = csv.DictReader(f)  
82 - for row in reader:  
83 - try:  
84 - product_id = int(row["id"])  
85 - products[product_id] = {  
86 - "id": product_id,  
87 - "gender": row.get("gender", ""),  
88 - "masterCategory": row.get("masterCategory", ""),  
89 - "subCategory": row.get("subCategory", ""),  
90 - "articleType": row.get("articleType", ""),  
91 - "baseColour": row.get("baseColour", ""),  
92 - "season": row.get("season", ""),  
93 - "year": int(row["year"]) if row.get("year") else 0,  
94 - "usage": row.get("usage", ""),  
95 - "productDisplayName": row.get("productDisplayName", ""),  
96 - "imageUrl": images_dict.get(product_id, ""),  
97 - "imagePath": f"{product_id}.jpg",  
98 - }  
99 - except (ValueError, KeyError) as e:  
100 - logger.warning(f"Error loading product {row.get('id')}: {e}")  
101 - continue  
102 -  
103 - logger.info(f"Loaded {len(products)} products from CSV")  
104 - return products  
105 -  
106 - def setup(self) -> None:  
107 - """Setup connections and collections"""  
108 - logger.info("Setting up services...")  
109 -  
110 - # Connect to CLIP server  
111 - self.embedding_service.connect_clip()  
112 - logger.info("✓ CLIP server connected")  
113 -  
114 - # Connect to Milvus  
115 - self.milvus_service.connect()  
116 - logger.info("✓ Milvus connected")  
117 -  
118 - # Create Milvus collections  
119 - self.milvus_service.create_text_collection(recreate=False)  
120 - self.milvus_service.create_image_collection(recreate=False)  
121 - logger.info("✓ Milvus collections ready")  
122 -  
123 - def teardown(self) -> None:  
124 - """Close all connections"""  
125 - logger.info("Closing connections...")  
126 - self.embedding_service.disconnect_clip()  
127 - self.milvus_service.disconnect()  
128 - logger.info("✓ All connections closed")  
129 -  
130 - def index_text_embeddings(  
131 - self, batch_size: int = 100, skip: int = 0, limit: Optional[int] = None  
132 - ) -> Dict[str, int]:  
133 - """Generate and store text embeddings for products  
134 -  
135 - Args:  
136 - batch_size: Number of products to process at once  
137 - skip: Number of products to skip  
138 - limit: Maximum number of products to process (None for all)  
139 -  
140 - Returns:  
141 - Dictionary with indexing statistics  
142 - """  
143 - logger.info("Starting text embedding indexing...")  
144 -  
145 - # Get products list  
146 - product_ids = list(self.products.keys())[skip:]  
147 - if limit:  
148 - product_ids = product_ids[:limit]  
149 -  
150 - total_products = len(product_ids)  
151 - processed = 0  
152 - inserted = 0  
153 - errors = 0  
154 -  
155 - with tqdm(total=total_products, desc="Indexing text embeddings") as pbar:  
156 - while processed < total_products:  
157 - # Get batch of products  
158 - current_batch_size = min(batch_size, total_products - processed)  
159 - batch_ids = product_ids[processed : processed + current_batch_size]  
160 - products = [self.products[pid] for pid in batch_ids]  
161 -  
162 - if not products:  
163 - break  
164 -  
165 - try:  
166 - # Prepare texts for embedding  
167 - texts = []  
168 - text_mappings = []  
169 -  
170 - for product in products:  
171 - # Create text representation of product  
172 - text = self._create_product_text(product)  
173 - texts.append(text)  
174 - text_mappings.append(  
175 - {"product_id": product["id"], "text": text}  
176 - )  
177 -  
178 - # Generate embeddings  
179 - embeddings = self.embedding_service.get_text_embeddings_batch(  
180 - texts, batch_size=50 # OpenAI batch size  
181 - )  
182 -  
183 - # Prepare data for Milvus (with metadata)  
184 - milvus_data = []  
185 - for idx, (mapping, embedding) in enumerate(  
186 - zip(text_mappings, embeddings)  
187 - ):  
188 - product_id = mapping["product_id"]  
189 - product = self.products[product_id]  
190 -  
191 - milvus_data.append(  
192 - {  
193 - "id": product_id,  
194 - "text": mapping["text"][  
195 - :2000  
196 - ], # Truncate to max length  
197 - "embedding": embedding,  
198 - # Product metadata  
199 - "productDisplayName": product["productDisplayName"][  
200 - :500  
201 - ],  
202 - "gender": product["gender"][:50],  
203 - "masterCategory": product["masterCategory"][:100],  
204 - "subCategory": product["subCategory"][:100],  
205 - "articleType": product["articleType"][:100],  
206 - "baseColour": product["baseColour"][:50],  
207 - "season": product["season"][:50],  
208 - "usage": product["usage"][:50],  
209 - "year": product["year"],  
210 - "imageUrl": product["imageUrl"],  
211 - "imagePath": product["imagePath"],  
212 - }  
213 - )  
214 -  
215 - # Insert into Milvus  
216 - count = self.milvus_service.insert_text_embeddings(milvus_data)  
217 - inserted += count  
218 -  
219 - except Exception as e:  
220 - logger.error(  
221 - f"Error processing text batch at offset {processed}: {e}"  
222 - )  
223 - errors += len(products)  
224 -  
225 - processed += len(products)  
226 - pbar.update(len(products))  
227 -  
228 - stats = {"total_processed": processed, "inserted": inserted, "errors": errors}  
229 -  
230 - logger.info(f"Text embedding indexing completed: {stats}")  
231 - return stats  
232 -  
233 - def index_image_embeddings(  
234 - self, batch_size: int = 32, skip: int = 0, limit: Optional[int] = None  
235 - ) -> Dict[str, int]:  
236 - """Generate and store image embeddings for products  
237 -  
238 - Args:  
239 - batch_size: Number of images to process at once  
240 - skip: Number of products to skip  
241 - limit: Maximum number of products to process (None for all)  
242 -  
243 - Returns:  
244 - Dictionary with indexing statistics  
245 - """  
246 - logger.info("Starting image embedding indexing...")  
247 -  
248 - # Get products list  
249 - product_ids = list(self.products.keys())[skip:]  
250 - if limit:  
251 - product_ids = product_ids[:limit]  
252 -  
253 - total_products = len(product_ids)  
254 - processed = 0  
255 - inserted = 0  
256 - errors = 0  
257 -  
258 - with tqdm(total=total_products, desc="Indexing image embeddings") as pbar:  
259 - while processed < total_products:  
260 - # Get batch of products  
261 - current_batch_size = min(batch_size, total_products - processed)  
262 - batch_ids = product_ids[processed : processed + current_batch_size]  
263 - products = [self.products[pid] for pid in batch_ids]  
264 -  
265 - if not products:  
266 - break  
267 -  
268 - try:  
269 - # Prepare image paths  
270 - image_paths = []  
271 - image_mappings = []  
272 -  
273 - for product in products:  
274 - image_path = self.image_dir / product["imagePath"]  
275 - image_paths.append(image_path)  
276 - image_mappings.append(  
277 - {  
278 - "product_id": product["id"],  
279 - "image_path": product["imagePath"],  
280 - }  
281 - )  
282 -  
283 - # Generate embeddings  
284 - embeddings = self.embedding_service.get_image_embeddings_batch(  
285 - image_paths, batch_size=batch_size  
286 - )  
287 -  
288 - # Prepare data for Milvus (with metadata)  
289 - milvus_data = []  
290 - for idx, (mapping, embedding) in enumerate(  
291 - zip(image_mappings, embeddings)  
292 - ):  
293 - if embedding is not None:  
294 - product_id = mapping["product_id"]  
295 - product = self.products[product_id]  
296 -  
297 - milvus_data.append(  
298 - {  
299 - "id": product_id,  
300 - "image_path": mapping["image_path"],  
301 - "embedding": embedding,  
302 - # Product metadata  
303 - "productDisplayName": product["productDisplayName"][  
304 - :500  
305 - ],  
306 - "gender": product["gender"][:50],  
307 - "masterCategory": product["masterCategory"][:100],  
308 - "subCategory": product["subCategory"][:100],  
309 - "articleType": product["articleType"][:100],  
310 - "baseColour": product["baseColour"][:50],  
311 - "season": product["season"][:50],  
312 - "usage": product["usage"][:50],  
313 - "year": product["year"],  
314 - "imageUrl": product["imageUrl"],  
315 - }  
316 - )  
317 - else:  
318 - errors += 1  
319 -  
320 - # Insert into Milvus  
321 - if milvus_data:  
322 - count = self.milvus_service.insert_image_embeddings(milvus_data)  
323 - inserted += count  
324 -  
325 - except Exception as e:  
326 - logger.error(  
327 - f"Error processing image batch at offset {processed}: {e}"  
328 - )  
329 - errors += len(products)  
330 -  
331 - processed += len(products)  
332 - pbar.update(len(products))  
333 -  
334 - stats = {"total_processed": processed, "inserted": inserted, "errors": errors}  
335 -  
336 - logger.info(f"Image embedding indexing completed: {stats}")  
337 - return stats  
338 -  
339 - def _create_product_text(self, product: Dict[str, Any]) -> str:  
340 - """Create text representation of product for embedding  
341 -  
342 - Args:  
343 - product: Product document  
344 -  
345 - Returns:  
346 - Text representation  
347 - """  
348 - # Create a natural language description  
349 - parts = [  
350 - product.get("productDisplayName", ""),  
351 - f"Gender: {product.get('gender', '')}",  
352 - f"Category: {product.get('masterCategory', '')} > {product.get('subCategory', '')}",  
353 - f"Type: {product.get('articleType', '')}",  
354 - f"Color: {product.get('baseColour', '')}",  
355 - f"Season: {product.get('season', '')}",  
356 - f"Usage: {product.get('usage', '')}",  
357 - ]  
358 -  
359 - text = " | ".join(  
360 - [p for p in parts if p and p != "Gender: " and p != "Color: "]  
361 - )  
362 - return text  
363 -  
364 - def get_stats(self) -> Dict[str, Any]:  
365 - """Get indexing statistics  
366 -  
367 - Returns:  
368 - Dictionary with statistics  
369 - """  
370 - text_stats = self.milvus_service.get_collection_stats(  
371 - self.milvus_service.text_collection_name  
372 - )  
373 - image_stats = self.milvus_service.get_collection_stats(  
374 - self.milvus_service.image_collection_name  
375 - )  
376 -  
377 - return {  
378 - "total_products": len(self.products),  
379 - "milvus_text": text_stats,  
380 - "milvus_image": image_stats,  
381 - }  
382 -  
383 -  
384 -def main():  
385 - """Main function"""  
386 - import argparse  
387 -  
388 - parser = argparse.ArgumentParser(description="Index product data for search")  
389 - parser.add_argument(  
390 - "--mode",  
391 - choices=["text", "image", "both"],  
392 - default="both",  
393 - help="Which embeddings to index",  
394 - )  
395 - parser.add_argument(  
396 - "--batch-size", type=int, default=100, help="Batch size for processing"  
397 - )  
398 - parser.add_argument(  
399 - "--skip", type=int, default=0, help="Number of products to skip"  
400 - )  
401 - parser.add_argument(  
402 - "--limit", type=int, default=None, help="Maximum number of products to process"  
403 - )  
404 - parser.add_argument("--stats", action="store_true", help="Show statistics only")  
405 -  
406 - args = parser.parse_args()  
407 -  
408 - # Create indexer  
409 - indexer = DataIndexer()  
410 -  
411 - try:  
412 - # Setup services  
413 - indexer.setup()  
414 -  
415 - if args.stats:  
416 - # Show statistics  
417 - stats = indexer.get_stats()  
418 - print("\n=== Indexing Statistics ===")  
419 - print(f"\nTotal Products in CSV: {stats['total_products']}")  
420 -  
421 - print("\nMilvus Text Embeddings:")  
422 - print(f" Collection: {stats['milvus_text']['collection_name']}")  
423 - print(f" Total embeddings: {stats['milvus_text']['row_count']}")  
424 -  
425 - print("\nMilvus Image Embeddings:")  
426 - print(f" Collection: {stats['milvus_image']['collection_name']}")  
427 - print(f" Total embeddings: {stats['milvus_image']['row_count']}")  
428 -  
429 - print(  
430 - f"\nCoverage: {stats['milvus_image']['row_count'] / stats['total_products'] * 100:.1f}%"  
431 - )  
432 - else:  
433 - # Index data  
434 - if args.mode in ["text", "both"]:  
435 - logger.info("=== Indexing Text Embeddings ===")  
436 - text_stats = indexer.index_text_embeddings(  
437 - batch_size=args.batch_size, skip=args.skip, limit=args.limit  
438 - )  
439 - print(f"\nText Indexing Results: {text_stats}")  
440 -  
441 - if args.mode in ["image", "both"]:  
442 - logger.info("=== Indexing Image Embeddings ===")  
443 - image_stats = indexer.index_image_embeddings(  
444 - batch_size=min(args.batch_size, 32), # Smaller batch for images  
445 - skip=args.skip,  
446 - limit=args.limit,  
447 - )  
448 - print(f"\nImage Indexing Results: {image_stats}")  
449 -  
450 - # Show final statistics  
451 - logger.info("\n=== Final Statistics ===")  
452 - stats = indexer.get_stats()  
453 - print(f"Total products: {stats['total_products']}")  
454 - print(f"Text embeddings: {stats['milvus_text']['row_count']}")  
455 - print(f"Image embeddings: {stats['milvus_image']['row_count']}")  
456 -  
457 - except KeyboardInterrupt:  
458 - logger.info("\nIndexing interrupted by user")  
459 - except Exception as e:  
460 - logger.error(f"Error during indexing: {e}", exc_info=True)  
461 - sys.exit(1)  
462 - finally:  
463 - indexer.teardown()  
464 -  
465 -  
466 -if __name__ == "__main__":  
467 - main()  
scripts/run_clip.sh deleted
@@ -1,22 +0,0 @@ @@ -1,22 +0,0 @@
1 -#!/usr/bin/env bash  
2 -# =============================================================================  
3 -# OmniShopAgent - 启动 CLIP 图像向量服务  
4 -# 图像搜索、以图搜图功能依赖此服务  
5 -# =============================================================================  
6 -set -euo pipefail  
7 -  
8 -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"  
9 -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"  
10 -VENV_DIR="${VENV_DIR:-$PROJECT_ROOT/venv}"  
11 -  
12 -cd "$PROJECT_ROOT"  
13 -  
14 -if [ -d "$VENV_DIR" ]; then  
15 - set +u  
16 - source "$VENV_DIR/bin/activate"  
17 - set -u  
18 -fi  
19 -  
20 -echo "启动 CLIP 服务 (端口 51000)..."  
21 -echo "按 Ctrl+C 停止"  
22 -exec python -m clip_server launch  
scripts/run_milvus.sh deleted
@@ -1,31 +0,0 @@ @@ -1,31 +0,0 @@
1 -#!/usr/bin/env bash  
2 -# =============================================================================  
3 -# OmniShopAgent - 启动 Milvus 向量数据库  
4 -# 使用 Docker Compose 启动 Milvus 及相关依赖  
5 -# =============================================================================  
6 -set -euo pipefail  
7 -  
8 -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"  
9 -PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"  
10 -  
11 -cd "$PROJECT_ROOT"  
12 -  
13 -if ! command -v docker &>/dev/null; then  
14 - echo "错误: 未安装 Docker。请先运行 setup_env_centos8.sh"  
15 - exit 1  
16 -fi  
17 -  
18 -echo "启动 Milvus..."  
19 -docker compose up -d 2>/dev/null || docker-compose up -d 2>/dev/null || {  
20 - echo "错误: 无法执行 docker compose。请确保已安装 Docker Compose"  
21 - exit 1  
22 -}  
23 -  
24 -echo "等待 Milvus 就绪 (约 60 秒)..."  
25 -sleep 60  
26 -  
27 -if curl -s -o /dev/null -w "%{http_code}" http://localhost:9091/healthz 2>/dev/null | grep -q 200; then  
28 - echo "Milvus 已就绪: localhost:19530"  
29 -else  
30 - echo "提示: Milvus 可能仍在启动,请稍后执行 check_services.sh 检查"  
31 -fi  
scripts/setup_env_centos8.sh
@@ -41,9 +41,9 @@ sudo dnf install -y \ @@ -41,9 +41,9 @@ sudo dnf install -y \
41 tar 41 tar
42 42
43 # ----------------------------------------------------------------------------- 43 # -----------------------------------------------------------------------------
44 -# 2. 安装 Docker(用于 Milvus 44 +# 2. 检查 Docker(可选
45 # ----------------------------------------------------------------------------- 45 # -----------------------------------------------------------------------------
46 -echo "[2/4] 检查/安装 Docker..." 46 +echo "[2/4] 检查 Docker..."
47 if ! command -v docker &>/dev/null; then 47 if ! command -v docker &>/dev/null; then
48 echo " 安装 Docker..." 48 echo " 安装 Docker..."
49 sudo dnf config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo 2>/dev/null || { 49 sudo dnf config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo 2>/dev/null || {
@@ -142,11 +142,9 @@ echo &quot;==========================================&quot; @@ -142,11 +142,9 @@ echo &quot;==========================================&quot;
142 echo "环境准备完成!" 142 echo "环境准备完成!"
143 echo "==========================================" 143 echo "=========================================="
144 echo "下一步:" 144 echo "下一步:"
145 -echo " 1. 编辑 .env 配置 OPENAI_API_KEY"  
146 -echo " 2. 下载数据: python scripts/download_dataset.py"  
147 -echo " 3. 启动 Milvus: ./scripts/run_milvus.sh"  
148 -echo " 4. 索引数据: python scripts/index_data.py"  
149 -echo " 5. 启动应用: ./scripts/start.sh" 145 +echo " 1. 编辑 .env 配置 OPENAI_API_KEY、SEARCH_API_BASE_URL 等"
  146 +echo " 2. (可选)下载数据: python scripts/download_dataset.py"
  147 +echo " 3. 启动应用: ./scripts/start.sh"
150 echo "" 148 echo ""
151 echo "激活虚拟环境: source $VENV_DIR/bin/activate" 149 echo "激活虚拟环境: source $VENV_DIR/bin/activate"
152 echo "==========================================" 150 echo "=========================================="
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
2 # ============================================================================= 2 # =============================================================================
3 # OmniShopAgent - 启动脚本 3 # OmniShopAgent - 启动脚本
4 -# 启动 Milvus、CLIP(可选)、Streamlit 应用 4 +# 启动 Streamlit 应用
5 # ============================================================================= 5 # =============================================================================
6 set -euo pipefail 6 set -euo pipefail
7 7
8 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 8 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9 PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" 9 PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
10 VENV_DIR="${VENV_DIR:-$PROJECT_ROOT/venv}" 10 VENV_DIR="${VENV_DIR:-$PROJECT_ROOT/venv}"
11 -STREAMLIT_PORT="${STREAMLIT_PORT:-8501}" 11 +STREAMLIT_PORT="${STREAMLIT_PORT:-6008}"
12 STREAMLIT_HOST="${STREAMLIT_HOST:-0.0.0.0}" 12 STREAMLIT_HOST="${STREAMLIT_HOST:-0.0.0.0}"
13 13
14 cd "$PROJECT_ROOT" 14 cd "$PROJECT_ROOT"
@@ -27,30 +27,7 @@ echo &quot;==========================================&quot; @@ -27,30 +27,7 @@ echo &quot;==========================================&quot;
27 echo "OmniShopAgent 启动" 27 echo "OmniShopAgent 启动"
28 echo "==========================================" 28 echo "=========================================="
29 29
30 -# 1. 启动 Milvus(Docker)  
31 -if command -v docker &>/dev/null; then  
32 - echo "[1/3] 检查 Milvus..."  
33 - if ! docker ps --format '{{.Names}}' 2>/dev/null | grep -q milvus-standalone; then  
34 - echo " 启动 Milvus (docker compose)..."  
35 - docker compose up -d 2>/dev/null || docker-compose up -d 2>/dev/null || {  
36 - echo " 警告: 无法启动 Milvus,请手动执行: docker compose up -d"  
37 - }  
38 - echo " 等待 Milvus 就绪 (30s)..."  
39 - sleep 30  
40 - else  
41 - echo " Milvus 已运行"  
42 - fi  
43 -else  
44 - echo "[1/3] 跳过 Milvus: 未安装 Docker"  
45 -fi  
46 -  
47 -# 2. 检查 CLIP(可选,图像搜索需要)  
48 -echo "[2/3] 检查 CLIP 服务..."  
49 -echo " 提示: 图像搜索需 CLIP。若未启动,请另开终端执行: python -m clip_server launch"  
50 -echo " 文本搜索可无需 CLIP。"  
51 -  
52 -# 3. 启动 Streamlit  
53 -echo "[3/3] 启动 Streamlit (端口 $STREAMLIT_PORT)..." 30 +echo "[1/1] 启动 Streamlit (端口 $STREAMLIT_PORT)..."
54 echo "" 31 echo ""
55 echo " 访问: http://$STREAMLIT_HOST:$STREAMLIT_PORT" 32 echo " 访问: http://$STREAMLIT_HOST:$STREAMLIT_PORT"
56 echo " 按 Ctrl+C 停止" 33 echo " 按 Ctrl+C 停止"
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
2 # ============================================================================= 2 # =============================================================================
3 # OmniShopAgent - 停止脚本 3 # OmniShopAgent - 停止脚本
4 -# 停止 Streamlit 进程及 Milvus 容器 4 +# 停止 Streamlit 进程
5 # ============================================================================= 5 # =============================================================================
6 set -euo pipefail 6 set -euo pipefail
7 7
8 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 8 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9 PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" 9 PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
10 -STREAMLIT_PORT="${STREAMLIT_PORT:-8501}" 10 +STREAMLIT_PORT="${STREAMLIT_PORT:-6008}"
11 11
12 echo "==========================================" 12 echo "=========================================="
13 echo "OmniShopAgent 停止" 13 echo "OmniShopAgent 停止"
14 echo "==========================================" 14 echo "=========================================="
15 15
16 # 1. 停止 Streamlit 进程 16 # 1. 停止 Streamlit 进程
17 -echo "[1/2] 停止 Streamlit..." 17 +echo "[1/1] 停止 Streamlit..."
18 if pgrep -f "streamlit run app.py" >/dev/null 2>&1; then 18 if pgrep -f "streamlit run app.py" >/dev/null 2>&1; then
19 pkill -f "streamlit run app.py" 2>/dev/null || true 19 pkill -f "streamlit run app.py" 2>/dev/null || true
20 echo " Streamlit 已停止" 20 echo " Streamlit 已停止"
@@ -31,16 +31,6 @@ if command -v lsof &amp;&gt;/dev/null; then @@ -31,16 +31,6 @@ if command -v lsof &amp;&gt;/dev/null; then
31 fi 31 fi
32 fi 32 fi
33 33
34 -# 2. 可选:停止 Milvus 容器  
35 -echo "[2/2] 停止 Milvus..."  
36 -if command -v docker &>/dev/null; then  
37 - cd "$PROJECT_ROOT"  
38 - docker compose down 2>/dev/null || docker-compose down 2>/dev/null || true  
39 - echo " Milvus 已停止"  
40 -else  
41 - echo " Docker 未安装,跳过"  
42 -fi  
43 -  
44 echo "==========================================" 34 echo "=========================================="
45 echo "OmniShopAgent 已停止" 35 echo "OmniShopAgent 已停止"
46 echo "==========================================" 36 echo "=========================================="