Commit e4a39cc844139bd7cd7adfa16c74f9cc09d6c672
1 parent
775db2b0
索引隔离。 不同的tenant_id用不同的索引
Showing
6 changed files
with
92 additions
and
49 deletions
Show diff stats
api/routes/search.py
| @@ -326,18 +326,36 @@ async def instant_search( | @@ -326,18 +326,36 @@ async def instant_search( | ||
| 326 | 326 | ||
| 327 | 327 | ||
| 328 | @router.get("/{doc_id}", response_model=DocumentResponse) | 328 | @router.get("/{doc_id}", response_model=DocumentResponse) |
| 329 | -async def get_document(doc_id: str): | 329 | +async def get_document(doc_id: str, http_request: Request): |
| 330 | """ | 330 | """ |
| 331 | Get a single document by ID. | 331 | Get a single document by ID. |
| 332 | + | ||
| 333 | + Requires tenant_id in header (X-Tenant-ID) or query parameter (tenant_id). | ||
| 332 | """ | 334 | """ |
| 333 | try: | 335 | try: |
| 336 | + # Extract tenant_id (required) | ||
| 337 | + tenant_id = http_request.headers.get('X-Tenant-ID') | ||
| 338 | + if not tenant_id: | ||
| 339 | + # Try to get from query string | ||
| 340 | + from urllib.parse import parse_qs | ||
| 341 | + query_string = http_request.url.query | ||
| 342 | + if query_string: | ||
| 343 | + params = parse_qs(query_string) | ||
| 344 | + tenant_id = params.get('tenant_id', [None])[0] | ||
| 345 | + | ||
| 346 | + if not tenant_id: | ||
| 347 | + raise HTTPException( | ||
| 348 | + status_code=400, | ||
| 349 | + detail="tenant_id is required. Provide it via header 'X-Tenant-ID' or query parameter 'tenant_id'" | ||
| 350 | + ) | ||
| 351 | + | ||
| 334 | from api.app import get_searcher | 352 | from api.app import get_searcher |
| 335 | searcher = get_searcher() | 353 | searcher = get_searcher() |
| 336 | 354 | ||
| 337 | - doc = searcher.get_document(doc_id) | 355 | + doc = searcher.get_document(tenant_id=tenant_id, doc_id=doc_id) |
| 338 | 356 | ||
| 339 | if doc is None: | 357 | if doc is None: |
| 340 | - raise HTTPException(status_code=404, detail=f"Document {doc_id} not found") | 358 | + raise HTTPException(status_code=404, detail=f"Document {doc_id} not found for tenant {tenant_id}") |
| 341 | 359 | ||
| 342 | return DocumentResponse(id=doc_id, source=doc) | 360 | return DocumentResponse(id=doc_id, source=doc) |
| 343 | 361 |
docs/亚马逊到店匠格式转换分析.md
indexer/bulk_indexing_service.py
| @@ -10,7 +10,7 @@ from sqlalchemy import Engine | @@ -10,7 +10,7 @@ from sqlalchemy import Engine | ||
| 10 | from utils.es_client import ESClient | 10 | from utils.es_client import ESClient |
| 11 | from indexer.spu_transformer import SPUTransformer | 11 | from indexer.spu_transformer import SPUTransformer |
| 12 | from indexer.bulk_indexer import BulkIndexer | 12 | from indexer.bulk_indexer import BulkIndexer |
| 13 | -from indexer.mapping_generator import load_mapping, delete_index_if_exists, DEFAULT_INDEX_NAME | 13 | +from indexer.mapping_generator import load_mapping, delete_index_if_exists, get_tenant_index_name |
| 14 | from indexer.indexer_logger import ( | 14 | from indexer.indexer_logger import ( |
| 15 | get_indexer_logger, log_index_request, log_index_result, log_bulk_index_batch | 15 | get_indexer_logger, log_index_request, log_index_result, log_bulk_index_batch |
| 16 | ) | 16 | ) |
| @@ -33,13 +33,16 @@ class BulkIndexingService: | @@ -33,13 +33,16 @@ class BulkIndexingService: | ||
| 33 | """ | 33 | """ |
| 34 | self.db_engine = db_engine | 34 | self.db_engine = db_engine |
| 35 | self.es_client = es_client | 35 | self.es_client = es_client |
| 36 | - self.index_name = DEFAULT_INDEX_NAME | 36 | + # Index name is now generated dynamically per tenant, no longer stored here |
| 37 | 37 | ||
| 38 | def bulk_index(self, tenant_id: str, recreate_index: bool = False, batch_size: int = 500) -> Dict[str, Any]: | 38 | def bulk_index(self, tenant_id: str, recreate_index: bool = False, batch_size: int = 500) -> Dict[str, Any]: |
| 39 | """执行全量索引""" | 39 | """执行全量索引""" |
| 40 | import time | 40 | import time |
| 41 | start_time = time.time() | 41 | start_time = time.time() |
| 42 | 42 | ||
| 43 | + # Generate tenant-specific index name | ||
| 44 | + index_name = get_tenant_index_name(tenant_id) | ||
| 45 | + | ||
| 43 | # 记录请求开始 | 46 | # 记录请求开始 |
| 44 | log_index_request( | 47 | log_index_request( |
| 45 | indexer_logger, | 48 | indexer_logger, |
| @@ -48,7 +51,7 @@ class BulkIndexingService: | @@ -48,7 +51,7 @@ class BulkIndexingService: | ||
| 48 | request_params={ | 51 | request_params={ |
| 49 | 'recreate_index': recreate_index, | 52 | 'recreate_index': recreate_index, |
| 50 | 'batch_size': batch_size, | 53 | 'batch_size': batch_size, |
| 51 | - 'index_name': self.index_name | 54 | + 'index_name': index_name |
| 52 | } | 55 | } |
| 53 | ) | 56 | ) |
| 54 | 57 | ||
| @@ -61,45 +64,45 @@ class BulkIndexingService: | @@ -61,45 +64,45 @@ class BulkIndexingService: | ||
| 61 | 'index_type': 'bulk', | 64 | 'index_type': 'bulk', |
| 62 | 'tenant_id': tenant_id, | 65 | 'tenant_id': tenant_id, |
| 63 | 'operation': 'load_mapping', | 66 | 'operation': 'load_mapping', |
| 64 | - 'index_name': self.index_name | 67 | + 'index_name': index_name |
| 65 | } | 68 | } |
| 66 | ) | 69 | ) |
| 67 | mapping = load_mapping() | 70 | mapping = load_mapping() |
| 68 | 71 | ||
| 69 | # 2. 处理索引(删除并重建或创建) | 72 | # 2. 处理索引(删除并重建或创建) |
| 70 | if recreate_index: | 73 | if recreate_index: |
| 71 | - logger.info(f"[BulkIndexing] Recreating index: {self.index_name}") | 74 | + logger.info(f"[BulkIndexing] Recreating index: {index_name}") |
| 72 | indexer_logger.info( | 75 | indexer_logger.info( |
| 73 | - f"Recreating index: {self.index_name}", | 76 | + f"Recreating index: {index_name}", |
| 74 | extra={ | 77 | extra={ |
| 75 | 'index_type': 'bulk', | 78 | 'index_type': 'bulk', |
| 76 | 'tenant_id': tenant_id, | 79 | 'tenant_id': tenant_id, |
| 77 | 'operation': 'recreate_index', | 80 | 'operation': 'recreate_index', |
| 78 | - 'index_name': self.index_name | 81 | + 'index_name': index_name |
| 79 | } | 82 | } |
| 80 | ) | 83 | ) |
| 81 | - if self.es_client.index_exists(self.index_name): | ||
| 82 | - if delete_index_if_exists(self.es_client, self.index_name): | ||
| 83 | - logger.info(f"[BulkIndexing] Deleted existing index: {self.index_name}") | 84 | + if self.es_client.index_exists(index_name): |
| 85 | + if delete_index_if_exists(self.es_client, index_name): | ||
| 86 | + logger.info(f"[BulkIndexing] Deleted existing index: {index_name}") | ||
| 84 | else: | 87 | else: |
| 85 | - raise Exception(f"Failed to delete index: {self.index_name}") | 88 | + raise Exception(f"Failed to delete index: {index_name}") |
| 86 | 89 | ||
| 87 | - if not self.es_client.index_exists(self.index_name): | ||
| 88 | - logger.info(f"[BulkIndexing] Creating index: {self.index_name}") | 90 | + if not self.es_client.index_exists(index_name): |
| 91 | + logger.info(f"[BulkIndexing] Creating index: {index_name}") | ||
| 89 | indexer_logger.info( | 92 | indexer_logger.info( |
| 90 | - f"Creating index: {self.index_name}", | 93 | + f"Creating index: {index_name}", |
| 91 | extra={ | 94 | extra={ |
| 92 | 'index_type': 'bulk', | 95 | 'index_type': 'bulk', |
| 93 | 'tenant_id': tenant_id, | 96 | 'tenant_id': tenant_id, |
| 94 | 'operation': 'create_index', | 97 | 'operation': 'create_index', |
| 95 | - 'index_name': self.index_name | 98 | + 'index_name': index_name |
| 96 | } | 99 | } |
| 97 | ) | 100 | ) |
| 98 | - if not self.es_client.create_index(self.index_name, mapping): | ||
| 99 | - raise Exception(f"Failed to create index: {self.index_name}") | ||
| 100 | - logger.info(f"[BulkIndexing] Created index: {self.index_name}") | 101 | + if not self.es_client.create_index(index_name, mapping): |
| 102 | + raise Exception(f"Failed to create index: {index_name}") | ||
| 103 | + logger.info(f"[BulkIndexing] Created index: {index_name}") | ||
| 101 | else: | 104 | else: |
| 102 | - logger.info(f"[BulkIndexing] Index already exists: {self.index_name}") | 105 | + logger.info(f"[BulkIndexing] Index already exists: {index_name}") |
| 103 | 106 | ||
| 104 | # 3. 转换数据 | 107 | # 3. 转换数据 |
| 105 | logger.info(f"[BulkIndexing] Transforming data for tenant_id={tenant_id}") | 108 | logger.info(f"[BulkIndexing] Transforming data for tenant_id={tenant_id}") |
| @@ -109,7 +112,7 @@ class BulkIndexingService: | @@ -109,7 +112,7 @@ class BulkIndexingService: | ||
| 109 | 'index_type': 'bulk', | 112 | 'index_type': 'bulk', |
| 110 | 'tenant_id': tenant_id, | 113 | 'tenant_id': tenant_id, |
| 111 | 'operation': 'transform_data', | 114 | 'operation': 'transform_data', |
| 112 | - 'index_name': self.index_name | 115 | + 'index_name': index_name |
| 113 | } | 116 | } |
| 114 | ) | 117 | ) |
| 115 | transformer = SPUTransformer(self.db_engine, tenant_id) | 118 | transformer = SPUTransformer(self.db_engine, tenant_id) |
| @@ -126,7 +129,7 @@ class BulkIndexingService: | @@ -126,7 +129,7 @@ class BulkIndexingService: | ||
| 126 | success_count=0, | 129 | success_count=0, |
| 127 | failed_count=0, | 130 | failed_count=0, |
| 128 | elapsed_time=elapsed_time, | 131 | elapsed_time=elapsed_time, |
| 129 | - index_name=self.index_name | 132 | + index_name=index_name |
| 130 | ) | 133 | ) |
| 131 | return { | 134 | return { |
| 132 | "success": True, | 135 | "success": True, |
| @@ -135,7 +138,7 @@ class BulkIndexingService: | @@ -135,7 +138,7 @@ class BulkIndexingService: | ||
| 135 | "failed": 0, | 138 | "failed": 0, |
| 136 | "elapsed_time": elapsed_time, | 139 | "elapsed_time": elapsed_time, |
| 137 | "message": "No documents to index", | 140 | "message": "No documents to index", |
| 138 | - "index_name": self.index_name, | 141 | + "index_name": index_name, |
| 139 | "tenant_id": tenant_id | 142 | "tenant_id": tenant_id |
| 140 | } | 143 | } |
| 141 | 144 | ||
| @@ -147,13 +150,13 @@ class BulkIndexingService: | @@ -147,13 +150,13 @@ class BulkIndexingService: | ||
| 147 | 'tenant_id': tenant_id, | 150 | 'tenant_id': tenant_id, |
| 148 | 'operation': 'transform_complete', | 151 | 'operation': 'transform_complete', |
| 149 | 'total_count': len(documents), | 152 | 'total_count': len(documents), |
| 150 | - 'index_name': self.index_name | 153 | + 'index_name': index_name |
| 151 | } | 154 | } |
| 152 | ) | 155 | ) |
| 153 | 156 | ||
| 154 | # 4. 批量导入 | 157 | # 4. 批量导入 |
| 155 | logger.info(f"[BulkIndexing] Indexing {len(documents)} documents (batch_size={batch_size})") | 158 | logger.info(f"[BulkIndexing] Indexing {len(documents)} documents (batch_size={batch_size})") |
| 156 | - indexer = BulkIndexer(self.es_client, self.index_name, batch_size=batch_size, max_retries=3) | 159 | + indexer = BulkIndexer(self.es_client, index_name, batch_size=batch_size, max_retries=3) |
| 157 | results = indexer.index_documents( | 160 | results = indexer.index_documents( |
| 158 | documents, | 161 | documents, |
| 159 | id_field="spu_id", | 162 | id_field="spu_id", |
| @@ -171,7 +174,7 @@ class BulkIndexingService: | @@ -171,7 +174,7 @@ class BulkIndexingService: | ||
| 171 | success_count=results['success'], | 174 | success_count=results['success'], |
| 172 | failed_count=results['failed'], | 175 | failed_count=results['failed'], |
| 173 | elapsed_time=elapsed_time, | 176 | elapsed_time=elapsed_time, |
| 174 | - index_name=self.index_name, | 177 | + index_name=index_name, |
| 175 | errors=results.get('errors', []) | 178 | errors=results.get('errors', []) |
| 176 | ) | 179 | ) |
| 177 | 180 | ||
| @@ -187,7 +190,7 @@ class BulkIndexingService: | @@ -187,7 +190,7 @@ class BulkIndexingService: | ||
| 187 | "indexed": results['success'], | 190 | "indexed": results['success'], |
| 188 | "failed": results['failed'], | 191 | "failed": results['failed'], |
| 189 | "elapsed_time": elapsed_time, | 192 | "elapsed_time": elapsed_time, |
| 190 | - "index_name": self.index_name, | 193 | + "index_name": index_name, |
| 191 | "tenant_id": tenant_id | 194 | "tenant_id": tenant_id |
| 192 | } | 195 | } |
| 193 | 196 | ||
| @@ -203,7 +206,7 @@ class BulkIndexingService: | @@ -203,7 +206,7 @@ class BulkIndexingService: | ||
| 203 | 'operation': 'request_failed', | 206 | 'operation': 'request_failed', |
| 204 | 'error': error_msg, | 207 | 'error': error_msg, |
| 205 | 'elapsed_time': elapsed_time, | 208 | 'elapsed_time': elapsed_time, |
| 206 | - 'index_name': self.index_name | 209 | + 'index_name': index_name |
| 207 | }, | 210 | }, |
| 208 | exc_info=True | 211 | exc_info=True |
| 209 | ) | 212 | ) |
indexer/incremental_service.py
| @@ -9,7 +9,7 @@ import numpy as np | @@ -9,7 +9,7 @@ import numpy as np | ||
| 9 | from sqlalchemy import text, bindparam | 9 | from sqlalchemy import text, bindparam |
| 10 | from indexer.indexing_utils import load_category_mapping, create_document_transformer | 10 | from indexer.indexing_utils import load_category_mapping, create_document_transformer |
| 11 | from indexer.bulk_indexer import BulkIndexer | 11 | from indexer.bulk_indexer import BulkIndexer |
| 12 | -from indexer.mapping_generator import DEFAULT_INDEX_NAME | 12 | +from indexer.mapping_generator import get_tenant_index_name |
| 13 | from indexer.indexer_logger import ( | 13 | from indexer.indexer_logger import ( |
| 14 | get_indexer_logger, log_index_request, log_index_result, log_spu_processing | 14 | get_indexer_logger, log_index_request, log_index_result, log_spu_processing |
| 15 | ) | 15 | ) |
| @@ -393,7 +393,7 @@ class IncrementalIndexerService: | @@ -393,7 +393,7 @@ class IncrementalIndexerService: | ||
| 393 | es_client, | 393 | es_client, |
| 394 | tenant_id: str, | 394 | tenant_id: str, |
| 395 | spu_ids: List[str], | 395 | spu_ids: List[str], |
| 396 | - index_name: str = DEFAULT_INDEX_NAME, | 396 | + index_name: str = None, |
| 397 | batch_size: int = 100, | 397 | batch_size: int = 100, |
| 398 | delete_spu_ids: List[str] = None | 398 | delete_spu_ids: List[str] = None |
| 399 | ) -> Dict[str, Any]: | 399 | ) -> Dict[str, Any]: |
| @@ -408,13 +408,16 @@ class IncrementalIndexerService: | @@ -408,13 +408,16 @@ class IncrementalIndexerService: | ||
| 408 | es_client: Elasticsearch客户端 | 408 | es_client: Elasticsearch客户端 |
| 409 | tenant_id: 租户ID | 409 | tenant_id: 租户ID |
| 410 | spu_ids: SPU ID列表(要索引的) | 410 | spu_ids: SPU ID列表(要索引的) |
| 411 | - index_name: 索引名称 | 411 | + index_name: 索引名称(可选,如果不提供则根据tenant_id自动生成) |
| 412 | batch_size: 批量写入ES的批次大小 | 412 | batch_size: 批量写入ES的批次大小 |
| 413 | delete_spu_ids: 显式指定要删除的SPU ID列表(可选) | 413 | delete_spu_ids: 显式指定要删除的SPU ID列表(可选) |
| 414 | 414 | ||
| 415 | Returns: | 415 | Returns: |
| 416 | 包含成功/失败列表的字典,以及删除结果 | 416 | 包含成功/失败列表的字典,以及删除结果 |
| 417 | """ | 417 | """ |
| 418 | + # Generate tenant-specific index name if not provided | ||
| 419 | + if index_name is None: | ||
| 420 | + index_name = get_tenant_index_name(tenant_id) | ||
| 418 | # 去重但保持顺序(避免重复DB/翻译/embedding/写ES) | 421 | # 去重但保持顺序(避免重复DB/翻译/embedding/写ES) |
| 419 | if spu_ids: | 422 | if spu_ids: |
| 420 | spu_ids = list(dict.fromkeys(spu_ids)) | 423 | spu_ids = list(dict.fromkeys(spu_ids)) |
indexer/mapping_generator.py
| @@ -11,13 +11,26 @@ from pathlib import Path | @@ -11,13 +11,26 @@ from pathlib import Path | ||
| 11 | 11 | ||
| 12 | logger = logging.getLogger(__name__) | 12 | logger = logging.getLogger(__name__) |
| 13 | 13 | ||
| 14 | -# Default index name | 14 | +# Default index name (deprecated, use get_tenant_index_name instead) |
| 15 | DEFAULT_INDEX_NAME = "search_products" | 15 | DEFAULT_INDEX_NAME = "search_products" |
| 16 | 16 | ||
| 17 | # Default mapping file path | 17 | # Default mapping file path |
| 18 | DEFAULT_MAPPING_FILE = Path(__file__).parent.parent / "mappings" / "search_products.json" | 18 | DEFAULT_MAPPING_FILE = Path(__file__).parent.parent / "mappings" / "search_products.json" |
| 19 | 19 | ||
| 20 | 20 | ||
| 21 | +def get_tenant_index_name(tenant_id: str) -> str: | ||
| 22 | + """ | ||
| 23 | + Generate index name for a tenant. | ||
| 24 | + | ||
| 25 | + Args: | ||
| 26 | + tenant_id: Tenant ID | ||
| 27 | + | ||
| 28 | + Returns: | ||
| 29 | + Index name in format: search_products_tenant_{tenant_id} | ||
| 30 | + """ | ||
| 31 | + return f"search_products_tenant_{tenant_id}" | ||
| 32 | + | ||
| 33 | + | ||
| 21 | def load_mapping(mapping_file: str = None) -> Dict[str, Any]: | 34 | def load_mapping(mapping_file: str = None) -> Dict[str, Any]: |
| 22 | """ | 35 | """ |
| 23 | Load Elasticsearch mapping from JSON file. | 36 | Load Elasticsearch mapping from JSON file. |
search/searcher.py
| @@ -20,6 +20,7 @@ from config.utils import get_match_fields_for_index | @@ -20,6 +20,7 @@ from config.utils import get_match_fields_for_index | ||
| 20 | from context.request_context import RequestContext, RequestContextStage, create_request_context | 20 | from context.request_context import RequestContext, RequestContextStage, create_request_context |
| 21 | from api.models import FacetResult, FacetValue, FacetConfig | 21 | from api.models import FacetResult, FacetValue, FacetConfig |
| 22 | from api.result_formatter import ResultFormatter | 22 | from api.result_formatter import ResultFormatter |
| 23 | +from indexer.mapping_generator import get_tenant_index_name | ||
| 23 | 24 | ||
| 24 | logger = logging.getLogger(__name__) | 25 | logger = logging.getLogger(__name__) |
| 25 | 26 | ||
| @@ -93,7 +94,7 @@ class Searcher: | @@ -93,7 +94,7 @@ class Searcher: | ||
| 93 | """ | 94 | """ |
| 94 | self.es_client = es_client | 95 | self.es_client = es_client |
| 95 | self.config = config | 96 | self.config = config |
| 96 | - self.index_name = config.es_index_name | 97 | + # Index name is now generated dynamically per tenant, no longer stored here |
| 97 | self.query_parser = query_parser or QueryParser(config) | 98 | self.query_parser = query_parser or QueryParser(config) |
| 98 | 99 | ||
| 99 | # Initialize components | 100 | # Initialize components |
| @@ -107,8 +108,9 @@ class Searcher: | @@ -107,8 +108,9 @@ class Searcher: | ||
| 107 | self.source_fields = config.query_config.source_fields or [] | 108 | self.source_fields = config.query_config.source_fields or [] |
| 108 | 109 | ||
| 109 | # Query builder - simplified single-layer architecture | 110 | # Query builder - simplified single-layer architecture |
| 111 | + # index_name is no longer needed in query builder since we use tenant-specific indices | ||
| 110 | self.query_builder = ESQueryBuilder( | 112 | self.query_builder = ESQueryBuilder( |
| 111 | - index_name=self.index_name, | 113 | + index_name="", # Not used, kept for backward compatibility |
| 112 | match_fields=self.match_fields, | 114 | match_fields=self.match_fields, |
| 113 | text_embedding_field=self.text_embedding_field, | 115 | text_embedding_field=self.text_embedding_field, |
| 114 | image_embedding_field=self.image_embedding_field, | 116 | image_embedding_field=self.image_embedding_field, |
| @@ -271,10 +273,10 @@ class Searcher: | @@ -271,10 +273,10 @@ class Searcher: | ||
| 271 | # Step 3: Query building | 273 | # Step 3: Query building |
| 272 | context.start_stage(RequestContextStage.QUERY_BUILDING) | 274 | context.start_stage(RequestContextStage.QUERY_BUILDING) |
| 273 | try: | 275 | try: |
| 274 | - # Add tenant_id to filters (required) | ||
| 275 | - if filters is None: | ||
| 276 | - filters = {} | ||
| 277 | - filters['tenant_id'] = tenant_id | 276 | + # Generate tenant-specific index name |
| 277 | + index_name = get_tenant_index_name(tenant_id) | ||
| 278 | + | ||
| 279 | + # No longer need to add tenant_id to filters since each tenant has its own index | ||
| 278 | 280 | ||
| 279 | es_query = self.query_builder.build_query( | 281 | es_query = self.query_builder.build_query( |
| 280 | query_text=parsed_query.rewritten_query or parsed_query.normalized_query, | 282 | query_text=parsed_query.rewritten_query or parsed_query.normalized_query, |
| @@ -332,8 +334,9 @@ class Searcher: | @@ -332,8 +334,9 @@ class Searcher: | ||
| 332 | # Step 4: Elasticsearch search | 334 | # Step 4: Elasticsearch search |
| 333 | context.start_stage(RequestContextStage.ELASTICSEARCH_SEARCH) | 335 | context.start_stage(RequestContextStage.ELASTICSEARCH_SEARCH) |
| 334 | try: | 336 | try: |
| 337 | + # Use tenant-specific index name | ||
| 335 | es_response = self.es_client.search( | 338 | es_response = self.es_client.search( |
| 336 | - index_name=self.index_name, | 339 | + index_name=index_name, |
| 337 | body=body_for_es, | 340 | body=body_for_es, |
| 338 | size=size, | 341 | size=size, |
| 339 | from_=from_ | 342 | from_=from_ |
| @@ -496,10 +499,10 @@ class Searcher: | @@ -496,10 +499,10 @@ class Searcher: | ||
| 496 | if image_vector is None: | 499 | if image_vector is None: |
| 497 | raise ValueError(f"Failed to encode image: {image_url}") | 500 | raise ValueError(f"Failed to encode image: {image_url}") |
| 498 | 501 | ||
| 499 | - # Add tenant_id to filters (required) | ||
| 500 | - if filters is None: | ||
| 501 | - filters = {} | ||
| 502 | - filters['tenant_id'] = tenant_id | 502 | + # Generate tenant-specific index name |
| 503 | + index_name = get_tenant_index_name(tenant_id) | ||
| 504 | + | ||
| 505 | + # No longer need to add tenant_id to filters since each tenant has its own index | ||
| 503 | 506 | ||
| 504 | # Build KNN query | 507 | # Build KNN query |
| 505 | es_query = { | 508 | es_query = { |
| @@ -529,7 +532,7 @@ class Searcher: | @@ -529,7 +532,7 @@ class Searcher: | ||
| 529 | 532 | ||
| 530 | # Execute search | 533 | # Execute search |
| 531 | es_response = self.es_client.search( | 534 | es_response = self.es_client.search( |
| 532 | - index_name=self.index_name, | 535 | + index_name=index_name, |
| 533 | body=es_query, | 536 | body=es_query, |
| 534 | size=size | 537 | size=size |
| 535 | ) | 538 | ) |
| @@ -576,24 +579,26 @@ class Searcher: | @@ -576,24 +579,26 @@ class Searcher: | ||
| 576 | """ | 579 | """ |
| 577 | return self.query_builder.get_domain_summary() | 580 | return self.query_builder.get_domain_summary() |
| 578 | 581 | ||
| 579 | - def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]: | 582 | + def get_document(self, tenant_id: str, doc_id: str) -> Optional[Dict[str, Any]]: |
| 580 | """ | 583 | """ |
| 581 | Get single document by ID. | 584 | Get single document by ID. |
| 582 | 585 | ||
| 583 | Args: | 586 | Args: |
| 587 | + tenant_id: Tenant ID (required to determine which index to query) | ||
| 584 | doc_id: Document ID | 588 | doc_id: Document ID |
| 585 | 589 | ||
| 586 | Returns: | 590 | Returns: |
| 587 | Document or None if not found | 591 | Document or None if not found |
| 588 | """ | 592 | """ |
| 589 | try: | 593 | try: |
| 594 | + index_name = get_tenant_index_name(tenant_id) | ||
| 590 | response = self.es_client.client.get( | 595 | response = self.es_client.client.get( |
| 591 | - index=self.index_name, | 596 | + index=index_name, |
| 592 | id=doc_id | 597 | id=doc_id |
| 593 | ) | 598 | ) |
| 594 | return response.get('_source') | 599 | return response.get('_source') |
| 595 | except Exception as e: | 600 | except Exception as e: |
| 596 | - logger.error(f"Failed to get document {doc_id}: {e}", exc_info=True) | 601 | + logger.error(f"Failed to get document {doc_id} from tenant {tenant_id}: {e}", exc_info=True) |
| 597 | return None | 602 | return None |
| 598 | 603 | ||
| 599 | def _standardize_facets( | 604 | def _standardize_facets( |