Commit 484adbfe9ccfae39f5ce92c0173011d724402d47

Authored by tangwang
1 parent a7920e17

adapt ubuntu; conda -> venv

1 1 # Elasticsearch Configuration
2 2 ES_HOST=http://localhost:9200
3   -ES_USERNAME=essa
  3 +ES_USERNAME=saas
4 4 ES_PASSWORD=4hOaLaf41y2VuI8y
5 5  
6 6 # Redis Configuration (Optional)
... ... @@ -30,7 +30,7 @@ IMAGE_MODEL_DIR=/data/tw/models/cn-clip # å·²ç»æ”¹ä¸ºweb请求了,ä¸ä½¿ç”¨æœ
30 30 CACHE_DIR=.cache
31 31  
32 32 # Frontend API Base URL
33   -API_BASE_URL=http://120.76.41.98:6002
  33 +API_BASE_URL=http://43.166.252.75:6002
34 34  
35 35  
36 36 DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b
... ...
.gitignore
... ... @@ -49,6 +49,7 @@ __pycache__
49 49 .history.txt
50 50 log/
51 51 logs/
  52 +.venv/
52 53 nohup.out
53 54 temp/
54 55 indexer_input*
... ...
README.md
... ... @@ -70,7 +70,7 @@ query anchor
70 70  
71 71 对外:
72 72 embedding服务:
73   - curl -X POST http://120.76.41.98:6005/embed/text \
  73 + curl -X POST http://43.166.252.75:6005/embed/text \
74 74 -H "Content-Type: application/json" \
75 75 -d '["衣服", "Bohemian Maxi Dress"]'
76 76  
... ... @@ -91,7 +91,7 @@ localhost替换为
91 91 服务器内网地址:
92 92 10.0.163.168
93 93 公网地址:
94   -120.76.41.98
  94 +43.166.252.75
95 95  
96 96  
97 97 # 电商搜索引擎 SaaS
... ... @@ -101,15 +101,18 @@ localhost替换为
101 101  
102 102 ## 项目环境
103 103  
104   -以项目根目录的 **`activate.sh`** 为准(会激活 Conda 环境 `searchengine` 并加载 `.env`):
  104 +以项目根目录的 **`activate.sh`** 为准(**优先激活 venv:`./.venv`,并加载 `.env`;兼容 Conda 回退**):
105 105  
106 106 ```bash
107   -# 若在新机器且 Conda 不在默认路径,先设置:
108   -# - 你的 conda 是 ~/anaconda3/bin/conda,则:export CONDA_ROOT=$HOME/anaconda3
  107 +# 推荐:首次创建 venv(默认安装基础依赖)
  108 +./scripts/create_venv.sh
  109 +
  110 +# 如需本地向量/图片编码(会安装 torch/transformers 等重依赖)
  111 +# INSTALL_ML=1 ./scripts/create_venv.sh
109 112 source activate.sh
110 113 ```
111 114  
112   -新机器首次需创建环境,见 `docs/环境配置说明.md`(`conda env create -f environment.yml` 或 `pip install -r requirements.txt`)。
  115 +新机器首次需创建环境,见 `docs/环境配置说明.md`(推荐 venv;Conda 为兼容旧流程)。
113 116  
114 117 ## 测试pipeline
115 118  
... ...
activate.sh
1 1 #!/bin/bash
2   -# 新机器部署:可设置 CONDA_ROOT 指向本机 Conda 路径
3   -# 例如你的 conda 是 ~/anaconda3/bin/conda,则 export CONDA_ROOT=$HOME/anaconda3
4   -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}"
5   -source "$CONDA_ROOT/etc/profile.d/conda.sh"
6   -conda activate searchengine
  2 +#
  3 +# Unified environment activator (venv preferred, conda fallback).
  4 +#
  5 +# Usage:
  6 +# source activate.sh
  7 +#
  8 +# Priority:
  9 +# 1) ./.venv (Python venv)
  10 +# 2) conda env "searchengine" (legacy)
  11 +#
  12 +
  13 +# Must be sourced
  14 +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
  15 + echo "ERROR: Please source this script: source activate.sh" >&2
  16 + exit 1
  17 +fi
  18 +
  19 +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  20 +
  21 +# 1) venv (preferred)
  22 +VENV_ACTIVATE="${PROJECT_ROOT}/.venv/bin/activate"
  23 +if [[ -f "${VENV_ACTIVATE}" ]]; then
  24 + # shellcheck disable=SC1090
  25 + source "${VENV_ACTIVATE}"
  26 + ENV_KIND="venv"
  27 +else
  28 + # 2) conda fallback (legacy)
  29 + # 新机器部署:可设置 CONDA_ROOT 指向本机 Conda 路径
  30 + # 例如你的 conda 是 ~/anaconda3/bin/conda,则 export CONDA_ROOT=$HOME/anaconda3
  31 + CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}"
  32 + if [[ -f "${CONDA_ROOT}/etc/profile.d/conda.sh" ]]; then
  33 + # shellcheck disable=SC1091
  34 + source "${CONDA_ROOT}/etc/profile.d/conda.sh"
  35 + conda activate searchengine
  36 + ENV_KIND="conda"
  37 + else
  38 + echo "ERROR: No .venv found and conda.sh not found at ${CONDA_ROOT}/etc/profile.d/conda.sh" >&2
  39 + echo " - Create venv: ./scripts/create_venv.sh" >&2
  40 + echo " - Or set CONDA_ROOT to your conda install path" >&2
  41 + return 1
  42 + fi
  43 +fi
7 44  
8 45 # 如果需要加载 .env 中的环境变量
9   -if [ -f .env ]; then
  46 +ENV_FILE="${PROJECT_ROOT}/.env"
  47 +if [ -f "${ENV_FILE}" ]; then
10 48 set -a # 自动导出所有变量
11   - source <(grep -v '^#' .env | grep -v '^$' | sed 's/#.*$//' | sed 's/\r$//')
  49 + # NOTE: This loader tolerates comments/blank lines and strips inline comments.
  50 + source <(grep -v '^#' "${ENV_FILE}" | grep -v '^$' | sed 's/#.*$//' | sed 's/\r$//')
12 51 set +a # 关闭自动导出
13 52 fi
14 53  
15   -echo "Environment activated: searchengine"
  54 +echo "Environment activated (${ENV_KIND}): ${VIRTUAL_ENV:-${CONDA_DEFAULT_ENV:-unknown}}"
... ...
docs/CNCLIP_SERVICE说明文档.md
... ... @@ -12,7 +12,7 @@ normlize后的结果:
12 12 https://aisearch.cdn.bcebos.com/fileManager/GtB5doGAr1skTx38P7fb7Q/182.jpg?authorization=bce-auth-v1%2F7e22d8caf5af46cc9310f1e3021709f3%2F2025-12-30T04%3A45%3A38Z%2F86400%2Fhost%2Ffe222039926cb7ff593021af40268c782b8892598114e24773d0c1bfc976a8df
13 13 https://oss.essa.cn/2e353867-7496-4d4e-a7c8-0af50f49f6eb.jpg?x-oss-process=image/resize,m_lfit,w_2048,h_2048
14 14  
15   -curl -X POST "http://120.76.41.98:5000/embedding/generate_image_embeddings" -H "Content-Type: application/json" -d '[
  15 +curl -X POST "http://43.166.252.75:5000/embedding/generate_image_embeddings" -H "Content-Type: application/json" -d '[
16 16 {
17 17 "id": "test_1",
18 18 "pic_url": "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg"
... ...
docs/ES/ES_8.18/1_ES配置和使用.md
... ... @@ -2,7 +2,7 @@
2 2  
3 3 ## 相关链接
4 4 - 接口文档:http://rap.essa.top:88/workspace/myWorkspace.do?projectId=78#2187
5   -- Kibana 控制台:http://120.76.41.98:5601/app/dev_tools#/console/shell
  5 +- Kibana 控制台:http://43.166.252.75:5601/app/dev_tools#/console/shell
6 6  
7 7 ## 分词方面
8 8  
... ...
docs/ES/ES_8.18/2_kibana安装.md
... ... @@ -47,5 +47,5 @@ systemctl enable kibana
47 47 ```
48 48  
49 49 在阿里云上面配置允许访问5601端口后,可以浏览器打开:
50   -http://120.76.41.98:5601/
  50 +http://43.166.252.75:5601/
51 51  
... ...
docs/Usage-Guide.md
... ... @@ -27,10 +27,19 @@
27 27  
28 28 #### 1. 安装 Python 依赖与激活环境
29 29  
30   -**推荐**:使用项目根目录的 `activate.sh` 激活环境(会加载 `.env`)。新机器部署时若 Conda 不在默认路径,请先设置 `CONDA_ROOT`(例如你的 conda 是 `~/anaconda3/bin/conda`,则 `export CONDA_ROOT=$HOME/anaconda3`)。详见 `docs/环境配置说明.md`。
  30 +**推荐**:使用项目根目录的 `activate.sh` 激活环境(会加载 `.env`)。目前推荐 venv(`.venv`);Conda 仅作为兼容回退(需要 `CONDA_ROOT`)。详见 `docs/环境配置说明.md`。
31 31  
32 32 ```bash
33 33 cd /data/saas-search
  34 +./scripts/create_venv.sh # 首次创建 venv(只需执行一次)
  35 +source activate.sh
  36 +```
  37 +
  38 +如果需要本地 embedding / 图像编码(会安装 torch/transformers 等较重依赖):
  39 +
  40 +```bash
  41 +cd /data/saas-search
  42 +INSTALL_ML=1 ./scripts/create_venv.sh
34 43 source activate.sh
35 44 ```
36 45  
... ...
docs/temporary/sku_image_src问题诊断报告.md
... ... @@ -98,7 +98,7 @@ else:
98 98  
99 99 2. **验证修复**:重新索引后,查询 ES 验证 `image_src` 字段是否已包含:
100 100 ```bash
101   - curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' \
  101 + curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' \
102 102 -H 'Content-Type: application/json' \
103 103 -d '{
104 104 "size": 1,
... ...
docs/常用查询 - ES.md
... ... @@ -8,7 +8,7 @@
8 8 # 一般情况下不需要在查询中再按 tenant_id 过滤(可选保留用于排查)。
9 9  
10 10 ### 1. 根据 tenant_id / spu_id 查询
11   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
  11 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
12 12 "size": 11,
13 13 "_source": ["title"],
14 14 "query": {
... ... @@ -21,7 +21,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_
21 21 }'
22 22  
23 23  
24   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
  24 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
25 25 "size": 100,
26 26 "_source": ["title"],
27 27 "query": {
... ... @@ -30,7 +30,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_
30 30 }'
31 31  
32 32  
33   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
  33 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
34 34 "size": 5,
35 35 "_source": ["title", "keywords", "tags"],
36 36 "query": {
... ... @@ -43,7 +43,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_
43 43 }'
44 44  
45 45  
46   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
  46 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
47 47 "size": 1,
48 48 "_source": ["title", "keywords", "tags"],
49 49 "query": {
... ... @@ -65,7 +65,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_
65 65 }'
66 66  
67 67  
68   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
  68 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
69 69 "size": 1,
70 70 "_source": ["title"],
71 71 "query": {
... ... @@ -86,17 +86,17 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_
86 86 }
87 87 }'
88 88  
89   -Curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
  89 +Curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
90 90 "analyzer": "index_ansj",
91 91 "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
92 92 }'
93 93  
94   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
  94 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{
95 95 "analyzer": "query_ansj",
96 96 "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝"
97 97 }'
98 98  
99   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
  99 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
100 100 "size": 100,
101 101 "from": 0,
102 102 "query": {
... ... @@ -131,7 +131,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_
131 131 }
132 132 }'
133 133  
134   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
  134 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{
135 135 "size": 1,
136 136 "from": 0,
137 137 "query": {
... ... @@ -258,7 +258,7 @@ GET /search_products_tenant_2/_search
258 258 }
259 259  
260 260  
261   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
  261 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
262 262 "size": 5,
263 263 "query": {
264 264 "bool": {
... ... @@ -271,7 +271,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/
271 271  
272 272  
273 273 ### 2. 统计租户的总文档数
274   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{
  274 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{
275 275 "query": {
276 276 "match_all": {}
277 277 }
... ... @@ -285,7 +285,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
285 285 ## 1. 检查ES文档的分面字段数据
286 286  
287 287 ### 1.1 查询特定租户的商品,显示分面相关字段
288   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  288 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
289 289 "query": {
290 290 "term": {
291 291 "tenant_id": "162"
... ... @@ -306,7 +306,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
306 306 }'
307 307  
308 308 ### 1.2 验证category1_name字段是否有数据
309   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  309 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
310 310 "query": {
311 311 "bool": {
312 312 "filter": [
... ... @@ -319,7 +319,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
319 319 }'
320 320  
321 321 ### 1.3 验证specifications字段是否有数据
322   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  322 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
323 323 "query": {
324 324 "bool": {
325 325 "filter": [
... ... @@ -334,7 +334,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
334 334 ## 2. 分面聚合查询(Facet Aggregations)
335 335  
336 336 ### 2.1 category1_name 分面聚合
337   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  337 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
338 338 "query": {
339 339 "match_all": {}
340 340 },
... ... @@ -350,7 +350,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
350 350 }'
351 351  
352 352 ### 2.2 specifications.color 分面聚合
353   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  353 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
354 354 "query": {
355 355 "match_all": {}
356 356 },
... ... @@ -382,7 +382,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
382 382 }'
383 383  
384 384 ### 2.3 specifications.size 分面聚合
385   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  385 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
386 386 "query": {
387 387 "match_all": {}
388 388 },
... ... @@ -414,7 +414,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
414 414 }'
415 415  
416 416 ### 2.4 specifications.material 分面聚合
417   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  417 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
418 418 "query": {
419 419 "match_all": {}
420 420 },
... ... @@ -446,7 +446,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
446 446 }'
447 447  
448 448 ### 2.5 综合分面聚合(category + color + size + material)
449   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  449 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
450 450 "query": {
451 451 "match_all": {}
452 452 },
... ... @@ -530,7 +530,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
530 530 ## 3. 检查specifications嵌套字段的详细结构
531 531  
532 532 ### 3.1 查看specifications的name字段有哪些值
533   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
  533 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
534 534 "query": {
535 535 "term": {
536 536 "tenant_id": "162"
... ... @@ -555,7 +555,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
555 555 }'
556 556  
557 557 ### 3.2 查看某个商品的完整specifications数据
558   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
  558 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{
559 559 "query": {
560 560 "bool": {
561 561 "filter": [
... ... @@ -571,7 +571,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products/_s
571 571 ## 4. 统计查询
572 572  
573 573 ### 4.1 统计有category1_name的文档数量
574   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{
  574 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{
575 575 "query": {
576 576 "bool": {
577 577 "filter": [
... ... @@ -582,7 +582,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
582 582 }'
583 583  
584 584 ### 4.2 统计有specifications的文档数量
585   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{
  585 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{
586 586 "query": {
587 587 "bool": {
588 588 "filter": [
... ... @@ -596,7 +596,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
596 596 ## 5. 诊断问题场景
597 597  
598 598 ### 5.1 查找没有category1_name但有category的文档(MySQL有数据但ES没有)
599   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  599 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
600 600 "query": {
601 601 "bool": {
602 602 "filter": [
... ... @@ -612,7 +612,7 @@ curl -u &#39;essa:4hOaLaf41y2VuI8y&#39; -X GET &#39;http://localhost:9200/search_products_te
612 612 }'
613 613  
614 614 ### 5.2 查找有option但没有specifications的文档(数据转换问题)
615   -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
  615 +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{
616 616 "query": {
617 617 "bool": {
618 618 "filter": [
... ...
docs/搜索API对接指南.md
... ... @@ -64,7 +64,7 @@
64 64  
65 65 ### 1.1 基础信息
66 66  
67   -- **Base URL**: `http://120.76.41.98:6002`
  67 +- **Base URL**: `http://43.166.252.75:6002`
68 68 - **协议**: HTTP/HTTPS
69 69 - **数据格式**: JSON
70 70 - **字符编码**: UTF-8
... ... @@ -75,7 +75,7 @@
75 75 ### 1.2 最简单的搜索请求
76 76  
77 77 ```bash
78   -curl -X POST "http://120.76.41.98:6002/search/" \
  78 +curl -X POST "http://43.166.252.75:6002/search/" \
79 79 -H "Content-Type: application/json" \
80 80 -H "X-Tenant-ID: 162" \
81 81 -d '{"query": "芭比娃娃"}'
... ... @@ -84,7 +84,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
84 84 ### 1.3 带过滤与分页的搜索
85 85  
86 86 ```bash
87   -curl -X POST "http://120.76.41.98:6002/search/" \
  87 +curl -X POST "http://43.166.252.75:6002/search/" \
88 88 -H "Content-Type: application/json" \
89 89 -H "X-Tenant-ID: 162" \
90 90 -d '{
... ... @@ -108,7 +108,7 @@ curl -X POST &quot;http://120.76.41.98:6002/search/&quot; \
108 108 ### 1.4 开启分面的搜索
109 109  
110 110 ```bash
111   -curl -X POST "http://120.76.41.98:6002/search/" \
  111 +curl -X POST "http://43.166.252.75:6002/search/" \
112 112 -H "Content-Type: application/json" \
113 113 -H "X-Tenant-ID: 162" \
114 114 -d '{
... ...
docs/环境配置说明.md
... ... @@ -11,14 +11,36 @@
11 11  
12 12 ## 2. Python 运行环境
13 13  
14   -**推荐方式(与项目脚本一致)**:使用项目根目录下的 `activate.sh` 激活环境,会自动加载当前目录下的 `.env`(忽略注释与空行):
  14 +本项目历史上使用 Conda 管理环境;目前推荐使用 **venv**(更轻量、对 CI/容器更友好)。项目根目录下的 `activate.sh` 已升级为 **优先激活 `.venv`,并兼容 Conda 回退**,且会自动加载当前目录下的 `.env`(忽略注释与空行)。
  15 +
  16 +### 2.1 venv(推荐)
  17 +
  18 +首次创建 venv:
  19 +
  20 +```bash
  21 +cd /data/saas-search
  22 +./scripts/create_venv.sh
  23 +source activate.sh
  24 +```
  25 +
  26 +如需运行本地 embedding / 图像编码服务(torch/transformers 等依赖较重):
15 27  
16 28 ```bash
17 29 cd /data/saas-search
  30 +INSTALL_ML=1 ./scripts/create_venv.sh
18 31 source activate.sh
19 32 ```
20 33  
21   -`activate.sh` 会激活 Conda 环境 `searchengine`。若在新机器上部署,请先设置本机 Conda 路径再执行:
  34 +日常使用:
  35 +
  36 +```bash
  37 +cd /data/saas-search
  38 +source activate.sh
  39 +```
  40 +
  41 +### 2.2 Conda(兼容旧流程)
  42 +
  43 +`activate.sh` 会在未发现 `.venv` 时回退激活 Conda 环境 `searchengine`。若在新机器上部署,请先设置本机 Conda 路径再执行:
22 44  
23 45 ```bash
24 46 # 你的 conda 在 ~/anaconda3/bin/conda,则 CONDA_ROOT=~/anaconda3
... ... @@ -26,7 +48,7 @@ export CONDA_ROOT=$HOME/anaconda3 # 或你的 Conda 安装路径(如 /home/u
26 48 source activate.sh
27 49 ```
28 50  
29   -**新机器首次部署(创建环境)**:若本机尚未创建 `searchengine` 环境,任选其一:
  51 +**新机器首次部署(创建 Conda 环境)**:若本机尚未创建 `searchengine` 环境,任选其一:
30 52  
31 53 - **方式 A(推荐,与 environment.yml 一致)**:
32 54 ```bash
... ... @@ -82,7 +104,7 @@ DB_PASSWORD=P89cZHS5d7dFyc9R
82 104  
83 105 # Elasticsearch
84 106 ES_HOST=http://localhost:9200
85   -ES_USERNAME=essa
  107 +ES_USERNAME=saas
86 108 ES_PASSWORD=4hOaLaf41y2VuI8y
87 109  
88 110 # Redis(可选)
... ... @@ -105,7 +127,7 @@ API_PORT=6002
105 127 | 项目 | 值 |
106 128 |------|----|
107 129 | **MySQL** | host `120.79.247.228`, port `3316`, user `saas`, password `P89cZHS5d7dFyc9R` |
108   -| **Elasticsearch** | host `http://localhost:9200`, user `essa`, password `4hOaLaf41y2VuI8y` |
  130 +| **Elasticsearch** | host `http://localhost:9200`, user `saas`, password `4hOaLaf41y2VuI8y` |
109 131 | **Redis(可选)** | host `localhost`, port `6479`, password `BMfv5aI31kgHWtlx` |
110 132 | **DeepL** | `c9293ab4-ad25-479b-919f-ab4e63b429ed` |
111 133  
... ...
frontend/README.md
... ... @@ -82,7 +82,7 @@ bash scripts/start_backend.sh
82 82 ```
83 83  
84 84 2. **访问前端**:
85   -打开浏览器访问:`http://120.76.41.98:6002/`
  85 +打开浏览器访问:`http://43.166.252.75:6002/`
86 86  
87 87 ### 搜索示例
88 88  
... ... @@ -97,7 +97,7 @@ bash scripts/start_backend.sh
97 97 前端通过以下接口与后端通信:
98 98  
99 99 ```javascript
100   -POST http://120.76.41.98:6002/search/
  100 +POST http://43.166.252.75:6002/search/
101 101  
102 102 请求体:
103 103 {
... ...
indexer/README.md
... ... @@ -198,7 +198,7 @@ categoryPath.set(categoryLang, translationCategoryPath)
198 198 你当前要使用的翻译接口(Python 侧):
199 199  
200 200 ```bash
201   -curl -X POST http://120.76.41.98:6006/translate \
  201 +curl -X POST http://43.166.252.75:6006/translate \
202 202 -H "Content-Type: application/json" \
203 203 -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣",
204 204 "target_lang":"en",
... ...
indexer/prompts.txt
... ... @@ -22,7 +22,7 @@ java索引程序职责:
22 22 本模块:
23 23 负责 msyql 基础数据 → 索引结构的doc (包括缓存)
24 24  
25   -翻译接口: curl -X POST http://120.76.41.98:6006/translate -H "Content-Type: application/json" -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣","target_lang":"en","source_lang":"auto"}'
  25 +翻译接口: curl -X POST http://43.166.252.75:6006/translate -H "Content-Type: application/json" -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣","target_lang":"en","source_lang":"auto"}'
26 26  
27 27 java的组织doc的逻辑都需要迁移过来。
28 28  
... ...
query/query_parser.py
... ... @@ -8,7 +8,6 @@ from typing import Dict, List, Optional, Any, Union
8 8 import numpy as np
9 9 import logging
10 10 import re
11   -import hanlp
12 11 from concurrent.futures import Future, ThreadPoolExecutor, as_completed
13 12  
14 13 from embeddings import BgeEncoder
... ... @@ -19,6 +18,10 @@ from .query_rewriter import QueryRewriter, QueryNormalizer
19 18  
20 19 logger = logging.getLogger(__name__)
21 20  
  21 +try:
  22 + import hanlp # type: ignore
  23 +except Exception: # pragma: no cover
  24 + hanlp = None
22 25  
23 26 class ParsedQuery:
24 27 """Container for parsed query results."""
... ... @@ -94,12 +97,22 @@ class QueryParser:
94 97 self.language_detector = LanguageDetector()
95 98 self.rewriter = QueryRewriter(config.query_config.rewrite_dictionary)
96 99  
97   - # Initialize HanLP components at startup
98   - logger.info("Initializing HanLP components...")
99   - self._tok = hanlp.load(hanlp.pretrained.tok.CTB9_TOK_ELECTRA_BASE_CRF)
100   - self._tok.config.output_spans = True
101   - self._pos_tag = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL)
102   - logger.info("HanLP components initialized")
  100 + # Optional HanLP components (heavy). If unavailable, fall back to a lightweight tokenizer.
  101 + self._tok = None
  102 + self._pos_tag = None
  103 + if hanlp is not None:
  104 + try:
  105 + logger.info("Initializing HanLP components...")
  106 + self._tok = hanlp.load(hanlp.pretrained.tok.CTB9_TOK_ELECTRA_BASE_CRF)
  107 + self._tok.config.output_spans = True
  108 + self._pos_tag = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL)
  109 + logger.info("HanLP components initialized")
  110 + except Exception as e:
  111 + logger.warning(f"HanLP init failed, falling back to simple tokenizer: {e}")
  112 + self._tok = None
  113 + self._pos_tag = None
  114 + else:
  115 + logger.info("HanLP not installed; using simple tokenizer")
103 116  
104 117 @property
105 118 def text_encoder(self) -> BgeEncoder:
... ... @@ -121,32 +134,51 @@ class QueryParser:
121 134 translation_context=self.config.query_config.translation_context
122 135 )
123 136 return self._translator
  137 +
  138 + def _simple_tokenize(self, text: str) -> List[str]:
  139 + """
  140 + Lightweight tokenizer fallback.
  141 +
  142 + - Groups consecutive CJK chars as a token
  143 + - Groups consecutive latin/digits/underscore/dash as a token
  144 + """
  145 + if not text:
  146 + return []
  147 + pattern = re.compile(r"[\u4e00-\u9fff]+|[A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)*")
  148 + return pattern.findall(text)
124 149  
125 150 def _extract_keywords(self, query: str) -> str:
126 151 """Extract keywords (nouns with length > 1) from query."""
127   - tok_result = self._tok(query)
128   - if not tok_result:
129   - return ""
130   -
131   - words = [x[0] for x in tok_result]
132   - pos_tags = self._pos_tag(words)
133   -
134   - keywords = []
135   - for word, pos in zip(words, pos_tags):
136   - if len(word) > 1 and pos.startswith('N'):
137   - keywords.append(word)
138   -
  152 + if self._tok is not None and self._pos_tag is not None:
  153 + tok_result = self._tok(query)
  154 + if not tok_result:
  155 + return ""
  156 + words = [x[0] for x in tok_result]
  157 + pos_tags = self._pos_tag(words)
  158 + keywords = []
  159 + for word, pos in zip(words, pos_tags):
  160 + if len(word) > 1 and isinstance(pos, str) and pos.startswith("N"):
  161 + keywords.append(word)
  162 + return " ".join(keywords)
  163 +
  164 + # Fallback: treat tokens with length > 1 as "keywords"
  165 + tokens = self._simple_tokenize(query)
  166 + keywords = [t for t in tokens if len(t) > 1]
139 167 return " ".join(keywords)
140 168  
141 169 def _get_token_count(self, query: str) -> int:
142   - """Get token count using HanLP."""
143   - tok_result = self._tok(query)
144   - return len(tok_result) if tok_result else 0
  170 + """Get token count (HanLP if available, otherwise simple)."""
  171 + if self._tok is not None:
  172 + tok_result = self._tok(query)
  173 + return len(tok_result) if tok_result else 0
  174 + return len(self._simple_tokenize(query))
145 175  
146 176 def _get_query_tokens(self, query: str) -> List[str]:
147   - """Get token list using HanLP."""
148   - tok_result = self._tok(query)
149   - return [x[0] for x in tok_result] if tok_result else []
  177 + """Get token list (HanLP if available, otherwise simple)."""
  178 + if self._tok is not None:
  179 + tok_result = self._tok(query)
  180 + return [x[0] for x in tok_result] if tok_result else []
  181 + return self._simple_tokenize(query)
150 182  
151 183 def parse(
152 184 self,
... ...
requirements.txt
... ... @@ -12,14 +12,13 @@ pandas&gt;=2.0.0
12 12 # Elasticsearch
13 13 elasticsearch>=8.0.0,<9.0.0
14 14  
15   -# ML/Embeddings
16   -torch>=2.0.0
17   -sentence-transformers>=2.2.0
18   -transformers>=4.30.0
19   -modelscope>=1.9.0
20   -cn-clip>=1.5.0
  15 +# Redis (cache; used by translator/embeddings)
  16 +redis>=5.0.0
  17 +
  18 +# Math / vector utilities (used across modules)
21 19 numpy>=1.24.0
22   -pillow>=10.0.0
  20 +
  21 +# LLM/Translation clients
23 22 openai>=1.0.0
24 23  
25 24 # API
... ...
requirements_ml.txt 0 → 100644
... ... @@ -0,0 +1,16 @@
  1 +# Optional heavy dependencies for local embedding/image encoding.
  2 +#
  3 +# Install when you need:
  4 +# - `./scripts/start_embedding_service.sh` (local embeddings server)
  5 +# - local BGE-M3 / CN-CLIP inference
  6 +#
  7 +# Notes:
  8 +# - `torch` wheels can be very large; if you want CPU-only wheels,
  9 +# consider installing torch separately with the official CPU index.
  10 +#
  11 +torch>=2.0.0
  12 +sentence-transformers>=2.2.0
  13 +transformers>=4.30.0
  14 +modelscope>=1.9.0
  15 +cn-clip>=1.5.0
  16 +pillow>=10.0.0
... ...
scripts/create_venv.sh 0 → 100644
... ... @@ -0,0 +1,59 @@
  1 +#!/bin/bash
  2 +#
  3 +# Create and initialize Python venv for saas-search.
  4 +#
  5 +# Usage:
  6 +# ./scripts/create_venv.sh
  7 +#
  8 +set -euo pipefail
  9 +
  10 +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
  11 +cd "${PROJECT_ROOT}"
  12 +
  13 +VENV_DIR="${PROJECT_ROOT}/.venv"
  14 +
  15 +PYTHON_BIN="${PYTHON_BIN:-python3.10}"
  16 +if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
  17 + PYTHON_BIN="python3"
  18 +fi
  19 +
  20 +echo "Using python: $(${PYTHON_BIN} --version)"
  21 +
  22 +if ! "${PYTHON_BIN}" -c "import ensurepip" >/dev/null 2>&1; then
  23 + echo "ERROR: ensurepip is not available for ${PYTHON_BIN}." >&2
  24 + echo "On Ubuntu/Debian, install the venv package first, e.g.:" >&2
  25 + echo " sudo apt-get update -y && sudo apt-get install -y python3-venv" >&2
  26 + echo "If you are using Python 3.12 specifically, you may need:" >&2
  27 + echo " sudo apt-get install -y python3.12-venv" >&2
  28 + exit 1
  29 +fi
  30 +
  31 +if [[ -d "${VENV_DIR}" ]]; then
  32 + if [[ -f "${VENV_DIR}/bin/activate" ]]; then
  33 + echo "venv already exists at ${VENV_DIR}"
  34 + else
  35 + echo "Found incomplete venv at ${VENV_DIR}, recreating..."
  36 + rm -rf "${VENV_DIR}"
  37 + "${PYTHON_BIN}" -m venv "${VENV_DIR}"
  38 + fi
  39 +else
  40 + echo "Creating venv at ${VENV_DIR} ..."
  41 + "${PYTHON_BIN}" -m venv "${VENV_DIR}"
  42 +fi
  43 +
  44 +# shellcheck disable=SC1091
  45 +source "${VENV_DIR}/bin/activate"
  46 +
  47 +python -m pip install --upgrade pip setuptools wheel
  48 +python -m pip install -r requirements.txt
  49 +
  50 +if [[ "${INSTALL_ML:-0}" == "1" ]]; then
  51 + echo
  52 + echo "INSTALL_ML=1 detected. Installing optional ML dependencies..."
  53 + python -m pip install -r requirements_ml.txt
  54 +fi
  55 +
  56 +echo
  57 +echo "Done."
  58 +echo "Next:"
  59 +echo " source activate.sh"
... ...
scripts/mock_data.sh
... ... @@ -20,9 +20,7 @@
20 20 # ============================================================================
21 21  
22 22 cd "$(dirname "$0")/.."
23   -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}"
24   -source "$CONDA_ROOT/etc/profile.d/conda.sh"
25   -conda activate searchengine
  23 +source ./activate.sh
26 24  
27 25 GREEN='\033[0;32m'
28 26 YELLOW='\033[1;33m'
... ... @@ -33,13 +31,6 @@ echo -e &quot;${GREEN}========================================${NC}&quot;
33 31 echo -e "${GREEN}Mock Data Script${NC}"
34 32 echo -e "${GREEN}========================================${NC}"
35 33  
36   -# Load config from .env file if it exists
37   -if [ -f .env ]; then
38   - set -a
39   - source .env
40   - set +a
41   -fi
42   -
43 34 # ============================================================================
44 35 # 写死的配置参数(不需要配置化,这是测试数据构造脚本)
45 36 # ============================================================================
... ...
scripts/start_backend.sh
... ... @@ -5,9 +5,7 @@
5 5 set -e
6 6  
7 7 cd "$(dirname "$0")/.."
8   -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}"
9   -source "$CONDA_ROOT/etc/profile.d/conda.sh"
10   -conda activate searchengine
  8 +source ./activate.sh
11 9  
12 10 GREEN='\033[0;32m'
13 11 YELLOW='\033[1;33m'
... ... @@ -17,13 +15,6 @@ echo -e &quot;${GREEN}========================================${NC}&quot;
17 15 echo -e "${GREEN}Starting Backend API Service${NC}"
18 16 echo -e "${GREEN}========================================${NC}"
19 17  
20   -# Load config from .env file if it exists
21   -if [ -f .env ]; then
22   - set -a
23   - source .env
24   - set +a
25   -fi
26   -
27 18 echo -e "\n${YELLOW}Configuration:${NC}"
28 19 echo " API Host: ${API_HOST:-0.0.0.0}"
29 20 echo " API Port: ${API_PORT:-6002}"
... ...
scripts/start_embedding_service.sh
... ... @@ -12,12 +12,7 @@ set -e
12 12  
13 13 cd "$(dirname "$0")/.."
14 14  
15   -# Load conda env if available (keep consistent with other scripts)
16   -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}"
17   -if [ -f "$CONDA_ROOT/etc/profile.d/conda.sh" ]; then
18   - source "$CONDA_ROOT/etc/profile.d/conda.sh"
19   - conda activate searchengine
20   -fi
  15 +source ./activate.sh
21 16  
22 17 EMBEDDING_SERVICE_HOST=$(python -c "from embeddings.config import CONFIG; print(CONFIG.HOST)")
23 18 EMBEDDING_SERVICE_PORT=$(python -c "from embeddings.config import CONFIG; print(CONFIG.PORT)")
... ...
scripts/start_frontend.sh
... ... @@ -5,9 +5,7 @@
5 5 set -e
6 6  
7 7 cd "$(dirname "$0")/.."
8   -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}"
9   -source "$CONDA_ROOT/etc/profile.d/conda.sh"
10   -conda activate searchengine
  8 +source ./activate.sh
11 9  
12 10 GREEN='\033[0;32m'
13 11 YELLOW='\033[1;33m'
... ...
scripts/start_indexer.sh
... ... @@ -5,9 +5,7 @@
5 5 set -e
6 6  
7 7 cd "$(dirname "$0")/.."
8   -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}"
9   -source "$CONDA_ROOT/etc/profile.d/conda.sh"
10   -conda activate searchengine
  8 +source ./activate.sh
11 9  
12 10 GREEN='\033[0;32m'
13 11 YELLOW='\033[1;33m'
... ... @@ -17,13 +15,6 @@ echo -e &quot;${GREEN}========================================${NC}&quot;
17 15 echo -e "${GREEN}Starting Indexer API Service${NC}"
18 16 echo -e "${GREEN}========================================${NC}"
19 17  
20   -# Load config from .env file if it exists
21   -if [ -f .env ]; then
22   - set -a
23   - source .env
24   - set +a
25   -fi
26   -
27 18 echo -e "\n${YELLOW}Configuration:${NC}"
28 19 echo " INDEXER Host: ${INDEXER_HOST:-0.0.0.0}"
29 20 echo " INDEXER Port: ${INDEXER_PORT:-6004}"
... ...
scripts/tenant3__csv_to_shoplazza_xlsx.sh
1   -# 激活环境
2   -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}"
3   -source "$CONDA_ROOT/etc/profile.d/conda.sh"
4   -conda activate searchengine
  1 +#!/bin/bash
  2 +set -e
  3 +
  4 +cd "$(dirname "$0")/.."
  5 +source ./activate.sh
5 6  
6 7 # # 基本使用(生成所有数据)
7 8 # python scripts/csv_to_excel.py
... ...
setup.sh
1 1 #!/bin/bash
2 2  
3   -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}"
4   -source "$CONDA_ROOT/etc/profile.d/conda.sh"
5   -
6 3 # saas-search Setup and Startup Script
7 4 # This script sets up the environment and starts all services
8 5  
... ... @@ -22,24 +19,16 @@ echo -e &quot;${GREEN}========================================${NC}&quot;
22 19 cd "$(dirname "$0")"
23 20 PROJECT_ROOT=$(pwd)
24 21  
25   -echo -e "\n${YELLOW}Step 1: Setting up Conda environment${NC}"
26   -# Check if conda is available
27   -if ! command -v conda &> /dev/null; then
28   - echo -e "${RED}Error: conda not found. Please install Miniconda or Anaconda${NC}"
29   - exit 1
30   -fi
  22 +echo -e "\n${YELLOW}Step 1: Setting up Python environment (venv preferred)${NC}"
31 23  
32   -# Check if environment exists
33   -if conda env list | grep -q "searchengine"; then
34   - echo -e "${GREEN}Environment 'searchengine' already exists${NC}"
35   - conda activate searchengine
36   -else
37   - echo -e "${YELLOW}Creating conda environment 'searchengine'...${NC}"
38   - conda env create -f environment.yml
39   - conda activate searchengine
40   - echo -e "${GREEN}Environment created successfully!${NC}"
  24 +if [ ! -f "${PROJECT_ROOT}/.venv/bin/activate" ]; then
  25 + echo -e "${YELLOW}Creating venv and installing dependencies...${NC}"
  26 + ./scripts/create_venv.sh
41 27 fi
42 28  
  29 +# Activate environment + load .env
  30 +source ./activate.sh
  31 +
43 32 # Verify environment
44 33 echo -e "\n${YELLOW}Current Python version:${NC}"
45 34 python --version
... ... @@ -74,7 +63,7 @@ echo -e &quot;${GREEN}Setup Complete!${NC}&quot;
74 63 echo -e "${GREEN}========================================${NC}"
75 64 echo ""
76 65 echo -e "Next steps:"
77   -echo -e " 1. Ingest data: ${YELLOW}./scripts/ingest.sh${NC}"
78   -echo -e " 2. Start backend: ${YELLOW}./scripts/start_backend.sh${NC}"
  66 +echo -e " 1. Start backend: ${YELLOW}./scripts/start_backend.sh${NC}"
  67 +echo -e " 2. Start indexer: ${YELLOW}./scripts/start_indexer.sh${NC}"
79 68 echo -e " 3. Start frontend: ${YELLOW}./scripts/start_frontend.sh${NC}"
80 69 echo ""
... ...