Commit 484adbfe9ccfae39f5ce92c0173011d724402d47
1 parent
a7920e17
adapt ubuntu; conda -> venv
Showing
26 changed files
with
290 additions
and
154 deletions
Show diff stats
| 1 | # Elasticsearch Configuration | 1 | # Elasticsearch Configuration |
| 2 | ES_HOST=http://localhost:9200 | 2 | ES_HOST=http://localhost:9200 |
| 3 | -ES_USERNAME=essa | 3 | +ES_USERNAME=saas |
| 4 | ES_PASSWORD=4hOaLaf41y2VuI8y | 4 | ES_PASSWORD=4hOaLaf41y2VuI8y |
| 5 | 5 | ||
| 6 | # Redis Configuration (Optional) | 6 | # Redis Configuration (Optional) |
| @@ -30,7 +30,7 @@ IMAGE_MODEL_DIR=/data/tw/models/cn-clip # å·²ç»æ”¹ä¸ºweb请求了,ä¸ä½¿ç”¨æœ | @@ -30,7 +30,7 @@ IMAGE_MODEL_DIR=/data/tw/models/cn-clip # å·²ç»æ”¹ä¸ºweb请求了,ä¸ä½¿ç”¨æœ | ||
| 30 | CACHE_DIR=.cache | 30 | CACHE_DIR=.cache |
| 31 | 31 | ||
| 32 | # Frontend API Base URL | 32 | # Frontend API Base URL |
| 33 | -API_BASE_URL=http://120.76.41.98:6002 | 33 | +API_BASE_URL=http://43.166.252.75:6002 |
| 34 | 34 | ||
| 35 | 35 | ||
| 36 | DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b | 36 | DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b |
.gitignore
README.md
| @@ -70,7 +70,7 @@ query anchor | @@ -70,7 +70,7 @@ query anchor | ||
| 70 | 70 | ||
| 71 | 对外: | 71 | 对外: |
| 72 | embedding服务: | 72 | embedding服务: |
| 73 | - curl -X POST http://120.76.41.98:6005/embed/text \ | 73 | + curl -X POST http://43.166.252.75:6005/embed/text \ |
| 74 | -H "Content-Type: application/json" \ | 74 | -H "Content-Type: application/json" \ |
| 75 | -d '["衣服", "Bohemian Maxi Dress"]' | 75 | -d '["衣服", "Bohemian Maxi Dress"]' |
| 76 | 76 | ||
| @@ -91,7 +91,7 @@ localhost替换为 | @@ -91,7 +91,7 @@ localhost替换为 | ||
| 91 | 服务器内网地址: | 91 | 服务器内网地址: |
| 92 | 10.0.163.168 | 92 | 10.0.163.168 |
| 93 | 公网地址: | 93 | 公网地址: |
| 94 | -120.76.41.98 | 94 | +43.166.252.75 |
| 95 | 95 | ||
| 96 | 96 | ||
| 97 | # 电商搜索引擎 SaaS | 97 | # 电商搜索引擎 SaaS |
| @@ -101,15 +101,18 @@ localhost替换为 | @@ -101,15 +101,18 @@ localhost替换为 | ||
| 101 | 101 | ||
| 102 | ## 项目环境 | 102 | ## 项目环境 |
| 103 | 103 | ||
| 104 | -以项目根目录的 **`activate.sh`** 为准(会激活 Conda 环境 `searchengine` 并加载 `.env`): | 104 | +以项目根目录的 **`activate.sh`** 为准(**优先激活 venv:`./.venv`,并加载 `.env`;兼容 Conda 回退**): |
| 105 | 105 | ||
| 106 | ```bash | 106 | ```bash |
| 107 | -# 若在新机器且 Conda 不在默认路径,先设置: | ||
| 108 | -# - 你的 conda 是 ~/anaconda3/bin/conda,则:export CONDA_ROOT=$HOME/anaconda3 | 107 | +# 推荐:首次创建 venv(默认安装基础依赖) |
| 108 | +./scripts/create_venv.sh | ||
| 109 | + | ||
| 110 | +# 如需本地向量/图片编码(会安装 torch/transformers 等重依赖) | ||
| 111 | +# INSTALL_ML=1 ./scripts/create_venv.sh | ||
| 109 | source activate.sh | 112 | source activate.sh |
| 110 | ``` | 113 | ``` |
| 111 | 114 | ||
| 112 | -新机器首次需创建环境,见 `docs/环境配置说明.md`(`conda env create -f environment.yml` 或 `pip install -r requirements.txt`)。 | 115 | +新机器首次需创建环境,见 `docs/环境配置说明.md`(推荐 venv;Conda 为兼容旧流程)。 |
| 113 | 116 | ||
| 114 | ## 测试pipeline | 117 | ## 测试pipeline |
| 115 | 118 |
activate.sh
| 1 | #!/bin/bash | 1 | #!/bin/bash |
| 2 | -# 新机器部署:可设置 CONDA_ROOT 指向本机 Conda 路径 | ||
| 3 | -# 例如你的 conda 是 ~/anaconda3/bin/conda,则 export CONDA_ROOT=$HOME/anaconda3 | ||
| 4 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 5 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 6 | -conda activate searchengine | 2 | +# |
| 3 | +# Unified environment activator (venv preferred, conda fallback). | ||
| 4 | +# | ||
| 5 | +# Usage: | ||
| 6 | +# source activate.sh | ||
| 7 | +# | ||
| 8 | +# Priority: | ||
| 9 | +# 1) ./.venv (Python venv) | ||
| 10 | +# 2) conda env "searchengine" (legacy) | ||
| 11 | +# | ||
| 12 | + | ||
| 13 | +# Must be sourced | ||
| 14 | +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||
| 15 | + echo "ERROR: Please source this script: source activate.sh" >&2 | ||
| 16 | + exit 1 | ||
| 17 | +fi | ||
| 18 | + | ||
| 19 | +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||
| 20 | + | ||
| 21 | +# 1) venv (preferred) | ||
| 22 | +VENV_ACTIVATE="${PROJECT_ROOT}/.venv/bin/activate" | ||
| 23 | +if [[ -f "${VENV_ACTIVATE}" ]]; then | ||
| 24 | + # shellcheck disable=SC1090 | ||
| 25 | + source "${VENV_ACTIVATE}" | ||
| 26 | + ENV_KIND="venv" | ||
| 27 | +else | ||
| 28 | + # 2) conda fallback (legacy) | ||
| 29 | + # 新机器部署:可设置 CONDA_ROOT 指向本机 Conda 路径 | ||
| 30 | + # 例如你的 conda 是 ~/anaconda3/bin/conda,则 export CONDA_ROOT=$HOME/anaconda3 | ||
| 31 | + CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 32 | + if [[ -f "${CONDA_ROOT}/etc/profile.d/conda.sh" ]]; then | ||
| 33 | + # shellcheck disable=SC1091 | ||
| 34 | + source "${CONDA_ROOT}/etc/profile.d/conda.sh" | ||
| 35 | + conda activate searchengine | ||
| 36 | + ENV_KIND="conda" | ||
| 37 | + else | ||
| 38 | + echo "ERROR: No .venv found and conda.sh not found at ${CONDA_ROOT}/etc/profile.d/conda.sh" >&2 | ||
| 39 | + echo " - Create venv: ./scripts/create_venv.sh" >&2 | ||
| 40 | + echo " - Or set CONDA_ROOT to your conda install path" >&2 | ||
| 41 | + return 1 | ||
| 42 | + fi | ||
| 43 | +fi | ||
| 7 | 44 | ||
| 8 | # 如果需要加载 .env 中的环境变量 | 45 | # 如果需要加载 .env 中的环境变量 |
| 9 | -if [ -f .env ]; then | 46 | +ENV_FILE="${PROJECT_ROOT}/.env" |
| 47 | +if [ -f "${ENV_FILE}" ]; then | ||
| 10 | set -a # 自动导出所有变量 | 48 | set -a # 自动导出所有变量 |
| 11 | - source <(grep -v '^#' .env | grep -v '^$' | sed 's/#.*$//' | sed 's/\r$//') | 49 | + # NOTE: This loader tolerates comments/blank lines and strips inline comments. |
| 50 | + source <(grep -v '^#' "${ENV_FILE}" | grep -v '^$' | sed 's/#.*$//' | sed 's/\r$//') | ||
| 12 | set +a # 关闭自动导出 | 51 | set +a # 关闭自动导出 |
| 13 | fi | 52 | fi |
| 14 | 53 | ||
| 15 | -echo "Environment activated: searchengine" | 54 | +echo "Environment activated (${ENV_KIND}): ${VIRTUAL_ENV:-${CONDA_DEFAULT_ENV:-unknown}}" |
docs/CNCLIP_SERVICE说明文档.md
| @@ -12,7 +12,7 @@ normlize后的结果: | @@ -12,7 +12,7 @@ normlize后的结果: | ||
| 12 | https://aisearch.cdn.bcebos.com/fileManager/GtB5doGAr1skTx38P7fb7Q/182.jpg?authorization=bce-auth-v1%2F7e22d8caf5af46cc9310f1e3021709f3%2F2025-12-30T04%3A45%3A38Z%2F86400%2Fhost%2Ffe222039926cb7ff593021af40268c782b8892598114e24773d0c1bfc976a8df | 12 | https://aisearch.cdn.bcebos.com/fileManager/GtB5doGAr1skTx38P7fb7Q/182.jpg?authorization=bce-auth-v1%2F7e22d8caf5af46cc9310f1e3021709f3%2F2025-12-30T04%3A45%3A38Z%2F86400%2Fhost%2Ffe222039926cb7ff593021af40268c782b8892598114e24773d0c1bfc976a8df |
| 13 | https://oss.essa.cn/2e353867-7496-4d4e-a7c8-0af50f49f6eb.jpg?x-oss-process=image/resize,m_lfit,w_2048,h_2048 | 13 | https://oss.essa.cn/2e353867-7496-4d4e-a7c8-0af50f49f6eb.jpg?x-oss-process=image/resize,m_lfit,w_2048,h_2048 |
| 14 | 14 | ||
| 15 | -curl -X POST "http://120.76.41.98:5000/embedding/generate_image_embeddings" -H "Content-Type: application/json" -d '[ | 15 | +curl -X POST "http://43.166.252.75:5000/embedding/generate_image_embeddings" -H "Content-Type: application/json" -d '[ |
| 16 | { | 16 | { |
| 17 | "id": "test_1", | 17 | "id": "test_1", |
| 18 | "pic_url": "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg" | 18 | "pic_url": "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg" |
docs/ES/ES_8.18/1_ES配置和使用.md
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | ## 相关链接 | 3 | ## 相关链接 |
| 4 | - 接口文档:http://rap.essa.top:88/workspace/myWorkspace.do?projectId=78#2187 | 4 | - 接口文档:http://rap.essa.top:88/workspace/myWorkspace.do?projectId=78#2187 |
| 5 | -- Kibana 控制台:http://120.76.41.98:5601/app/dev_tools#/console/shell | 5 | +- Kibana 控制台:http://43.166.252.75:5601/app/dev_tools#/console/shell |
| 6 | 6 | ||
| 7 | ## 分词方面 | 7 | ## 分词方面 |
| 8 | 8 |
docs/ES/ES_8.18/2_kibana安装.md
docs/Usage-Guide.md
| @@ -27,10 +27,19 @@ | @@ -27,10 +27,19 @@ | ||
| 27 | 27 | ||
| 28 | #### 1. 安装 Python 依赖与激活环境 | 28 | #### 1. 安装 Python 依赖与激活环境 |
| 29 | 29 | ||
| 30 | -**推荐**:使用项目根目录的 `activate.sh` 激活环境(会加载 `.env`)。新机器部署时若 Conda 不在默认路径,请先设置 `CONDA_ROOT`(例如你的 conda 是 `~/anaconda3/bin/conda`,则 `export CONDA_ROOT=$HOME/anaconda3`)。详见 `docs/环境配置说明.md`。 | 30 | +**推荐**:使用项目根目录的 `activate.sh` 激活环境(会加载 `.env`)。目前推荐 venv(`.venv`);Conda 仅作为兼容回退(需要 `CONDA_ROOT`)。详见 `docs/环境配置说明.md`。 |
| 31 | 31 | ||
| 32 | ```bash | 32 | ```bash |
| 33 | cd /data/saas-search | 33 | cd /data/saas-search |
| 34 | +./scripts/create_venv.sh # 首次创建 venv(只需执行一次) | ||
| 35 | +source activate.sh | ||
| 36 | +``` | ||
| 37 | + | ||
| 38 | +如果需要本地 embedding / 图像编码(会安装 torch/transformers 等较重依赖): | ||
| 39 | + | ||
| 40 | +```bash | ||
| 41 | +cd /data/saas-search | ||
| 42 | +INSTALL_ML=1 ./scripts/create_venv.sh | ||
| 34 | source activate.sh | 43 | source activate.sh |
| 35 | ``` | 44 | ``` |
| 36 | 45 |
docs/temporary/sku_image_src问题诊断报告.md
| @@ -98,7 +98,7 @@ else: | @@ -98,7 +98,7 @@ else: | ||
| 98 | 98 | ||
| 99 | 2. **验证修复**:重新索引后,查询 ES 验证 `image_src` 字段是否已包含: | 99 | 2. **验证修复**:重新索引后,查询 ES 验证 `image_src` 字段是否已包含: |
| 100 | ```bash | 100 | ```bash |
| 101 | - curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' \ | 101 | + curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' \ |
| 102 | -H 'Content-Type: application/json' \ | 102 | -H 'Content-Type: application/json' \ |
| 103 | -d '{ | 103 | -d '{ |
| 104 | "size": 1, | 104 | "size": 1, |
docs/常用查询 - ES.md
| @@ -8,7 +8,7 @@ | @@ -8,7 +8,7 @@ | ||
| 8 | # 一般情况下不需要在查询中再按 tenant_id 过滤(可选保留用于排查)。 | 8 | # 一般情况下不需要在查询中再按 tenant_id 过滤(可选保留用于排查)。 |
| 9 | 9 | ||
| 10 | ### 1. 根据 tenant_id / spu_id 查询 | 10 | ### 1. 根据 tenant_id / spu_id 查询 |
| 11 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | 11 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 12 | "size": 11, | 12 | "size": 11, |
| 13 | "_source": ["title"], | 13 | "_source": ["title"], |
| 14 | "query": { | 14 | "query": { |
| @@ -21,7 +21,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | @@ -21,7 +21,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | ||
| 21 | }' | 21 | }' |
| 22 | 22 | ||
| 23 | 23 | ||
| 24 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | 24 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 25 | "size": 100, | 25 | "size": 100, |
| 26 | "_source": ["title"], | 26 | "_source": ["title"], |
| 27 | "query": { | 27 | "query": { |
| @@ -30,7 +30,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | @@ -30,7 +30,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | ||
| 30 | }' | 30 | }' |
| 31 | 31 | ||
| 32 | 32 | ||
| 33 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | 33 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 34 | "size": 5, | 34 | "size": 5, |
| 35 | "_source": ["title", "keywords", "tags"], | 35 | "_source": ["title", "keywords", "tags"], |
| 36 | "query": { | 36 | "query": { |
| @@ -43,7 +43,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | @@ -43,7 +43,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | ||
| 43 | }' | 43 | }' |
| 44 | 44 | ||
| 45 | 45 | ||
| 46 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | 46 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 47 | "size": 1, | 47 | "size": 1, |
| 48 | "_source": ["title", "keywords", "tags"], | 48 | "_source": ["title", "keywords", "tags"], |
| 49 | "query": { | 49 | "query": { |
| @@ -65,7 +65,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | @@ -65,7 +65,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | ||
| 65 | }' | 65 | }' |
| 66 | 66 | ||
| 67 | 67 | ||
| 68 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | 68 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 69 | "size": 1, | 69 | "size": 1, |
| 70 | "_source": ["title"], | 70 | "_source": ["title"], |
| 71 | "query": { | 71 | "query": { |
| @@ -86,17 +86,17 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | @@ -86,17 +86,17 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | ||
| 86 | } | 86 | } |
| 87 | }' | 87 | }' |
| 88 | 88 | ||
| 89 | -Curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ | 89 | +Curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ |
| 90 | "analyzer": "index_ansj", | 90 | "analyzer": "index_ansj", |
| 91 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" | 91 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" |
| 92 | }' | 92 | }' |
| 93 | 93 | ||
| 94 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ | 94 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ |
| 95 | "analyzer": "query_ansj", | 95 | "analyzer": "query_ansj", |
| 96 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" | 96 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" |
| 97 | }' | 97 | }' |
| 98 | 98 | ||
| 99 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | 99 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 100 | "size": 100, | 100 | "size": 100, |
| 101 | "from": 0, | 101 | "from": 0, |
| 102 | "query": { | 102 | "query": { |
| @@ -131,7 +131,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | @@ -131,7 +131,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ | ||
| 131 | } | 131 | } |
| 132 | }' | 132 | }' |
| 133 | 133 | ||
| 134 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | 134 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 135 | "size": 1, | 135 | "size": 1, |
| 136 | "from": 0, | 136 | "from": 0, |
| 137 | "query": { | 137 | "query": { |
| @@ -258,7 +258,7 @@ GET /search_products_tenant_2/_search | @@ -258,7 +258,7 @@ GET /search_products_tenant_2/_search | ||
| 258 | } | 258 | } |
| 259 | 259 | ||
| 260 | 260 | ||
| 261 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | 261 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 262 | "size": 5, | 262 | "size": 5, |
| 263 | "query": { | 263 | "query": { |
| 264 | "bool": { | 264 | "bool": { |
| @@ -271,7 +271,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | @@ -271,7 +271,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ | ||
| 271 | 271 | ||
| 272 | 272 | ||
| 273 | ### 2. 统计租户的总文档数 | 273 | ### 2. 统计租户的总文档数 |
| 274 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{ | 274 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{ |
| 275 | "query": { | 275 | "query": { |
| 276 | "match_all": {} | 276 | "match_all": {} |
| 277 | } | 277 | } |
| @@ -285,7 +285,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -285,7 +285,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 285 | ## 1. 检查ES文档的分面字段数据 | 285 | ## 1. 检查ES文档的分面字段数据 |
| 286 | 286 | ||
| 287 | ### 1.1 查询特定租户的商品,显示分面相关字段 | 287 | ### 1.1 查询特定租户的商品,显示分面相关字段 |
| 288 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 288 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 289 | "query": { | 289 | "query": { |
| 290 | "term": { | 290 | "term": { |
| 291 | "tenant_id": "162" | 291 | "tenant_id": "162" |
| @@ -306,7 +306,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -306,7 +306,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 306 | }' | 306 | }' |
| 307 | 307 | ||
| 308 | ### 1.2 验证category1_name字段是否有数据 | 308 | ### 1.2 验证category1_name字段是否有数据 |
| 309 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 309 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 310 | "query": { | 310 | "query": { |
| 311 | "bool": { | 311 | "bool": { |
| 312 | "filter": [ | 312 | "filter": [ |
| @@ -319,7 +319,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -319,7 +319,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 319 | }' | 319 | }' |
| 320 | 320 | ||
| 321 | ### 1.3 验证specifications字段是否有数据 | 321 | ### 1.3 验证specifications字段是否有数据 |
| 322 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 322 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 323 | "query": { | 323 | "query": { |
| 324 | "bool": { | 324 | "bool": { |
| 325 | "filter": [ | 325 | "filter": [ |
| @@ -334,7 +334,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -334,7 +334,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 334 | ## 2. 分面聚合查询(Facet Aggregations) | 334 | ## 2. 分面聚合查询(Facet Aggregations) |
| 335 | 335 | ||
| 336 | ### 2.1 category1_name 分面聚合 | 336 | ### 2.1 category1_name 分面聚合 |
| 337 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 337 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 338 | "query": { | 338 | "query": { |
| 339 | "match_all": {} | 339 | "match_all": {} |
| 340 | }, | 340 | }, |
| @@ -350,7 +350,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -350,7 +350,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 350 | }' | 350 | }' |
| 351 | 351 | ||
| 352 | ### 2.2 specifications.color 分面聚合 | 352 | ### 2.2 specifications.color 分面聚合 |
| 353 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 353 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 354 | "query": { | 354 | "query": { |
| 355 | "match_all": {} | 355 | "match_all": {} |
| 356 | }, | 356 | }, |
| @@ -382,7 +382,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -382,7 +382,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 382 | }' | 382 | }' |
| 383 | 383 | ||
| 384 | ### 2.3 specifications.size 分面聚合 | 384 | ### 2.3 specifications.size 分面聚合 |
| 385 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 385 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 386 | "query": { | 386 | "query": { |
| 387 | "match_all": {} | 387 | "match_all": {} |
| 388 | }, | 388 | }, |
| @@ -414,7 +414,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -414,7 +414,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 414 | }' | 414 | }' |
| 415 | 415 | ||
| 416 | ### 2.4 specifications.material 分面聚合 | 416 | ### 2.4 specifications.material 分面聚合 |
| 417 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 417 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 418 | "query": { | 418 | "query": { |
| 419 | "match_all": {} | 419 | "match_all": {} |
| 420 | }, | 420 | }, |
| @@ -446,7 +446,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -446,7 +446,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 446 | }' | 446 | }' |
| 447 | 447 | ||
| 448 | ### 2.5 综合分面聚合(category + color + size + material) | 448 | ### 2.5 综合分面聚合(category + color + size + material) |
| 449 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 449 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 450 | "query": { | 450 | "query": { |
| 451 | "match_all": {} | 451 | "match_all": {} |
| 452 | }, | 452 | }, |
| @@ -530,7 +530,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -530,7 +530,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 530 | ## 3. 检查specifications嵌套字段的详细结构 | 530 | ## 3. 检查specifications嵌套字段的详细结构 |
| 531 | 531 | ||
| 532 | ### 3.1 查看specifications的name字段有哪些值 | 532 | ### 3.1 查看specifications的name字段有哪些值 |
| 533 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | 533 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 534 | "query": { | 534 | "query": { |
| 535 | "term": { | 535 | "term": { |
| 536 | "tenant_id": "162" | 536 | "tenant_id": "162" |
| @@ -555,7 +555,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | @@ -555,7 +555,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | ||
| 555 | }' | 555 | }' |
| 556 | 556 | ||
| 557 | ### 3.2 查看某个商品的完整specifications数据 | 557 | ### 3.2 查看某个商品的完整specifications数据 |
| 558 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | 558 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 559 | "query": { | 559 | "query": { |
| 560 | "bool": { | 560 | "bool": { |
| 561 | "filter": [ | 561 | "filter": [ |
| @@ -571,7 +571,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | @@ -571,7 +571,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s | ||
| 571 | ## 4. 统计查询 | 571 | ## 4. 统计查询 |
| 572 | 572 | ||
| 573 | ### 4.1 统计有category1_name的文档数量 | 573 | ### 4.1 统计有category1_name的文档数量 |
| 574 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | 574 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ |
| 575 | "query": { | 575 | "query": { |
| 576 | "bool": { | 576 | "bool": { |
| 577 | "filter": [ | 577 | "filter": [ |
| @@ -582,7 +582,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -582,7 +582,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 582 | }' | 582 | }' |
| 583 | 583 | ||
| 584 | ### 4.2 统计有specifications的文档数量 | 584 | ### 4.2 统计有specifications的文档数量 |
| 585 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | 585 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ |
| 586 | "query": { | 586 | "query": { |
| 587 | "bool": { | 587 | "bool": { |
| 588 | "filter": [ | 588 | "filter": [ |
| @@ -596,7 +596,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -596,7 +596,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 596 | ## 5. 诊断问题场景 | 596 | ## 5. 诊断问题场景 |
| 597 | 597 | ||
| 598 | ### 5.1 查找没有category1_name但有category的文档(MySQL有数据但ES没有) | 598 | ### 5.1 查找没有category1_name但有category的文档(MySQL有数据但ES没有) |
| 599 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 599 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 600 | "query": { | 600 | "query": { |
| 601 | "bool": { | 601 | "bool": { |
| 602 | "filter": [ | 602 | "filter": [ |
| @@ -612,7 +612,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | @@ -612,7 +612,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te | ||
| 612 | }' | 612 | }' |
| 613 | 613 | ||
| 614 | ### 5.2 查找有option但没有specifications的文档(数据转换问题) | 614 | ### 5.2 查找有option但没有specifications的文档(数据转换问题) |
| 615 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | 615 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ |
| 616 | "query": { | 616 | "query": { |
| 617 | "bool": { | 617 | "bool": { |
| 618 | "filter": [ | 618 | "filter": [ |
docs/搜索API对接指南.md
| @@ -64,7 +64,7 @@ | @@ -64,7 +64,7 @@ | ||
| 64 | 64 | ||
| 65 | ### 1.1 基础信息 | 65 | ### 1.1 基础信息 |
| 66 | 66 | ||
| 67 | -- **Base URL**: `http://120.76.41.98:6002` | 67 | +- **Base URL**: `http://43.166.252.75:6002` |
| 68 | - **协议**: HTTP/HTTPS | 68 | - **协议**: HTTP/HTTPS |
| 69 | - **数据格式**: JSON | 69 | - **数据格式**: JSON |
| 70 | - **字符编码**: UTF-8 | 70 | - **字符编码**: UTF-8 |
| @@ -75,7 +75,7 @@ | @@ -75,7 +75,7 @@ | ||
| 75 | ### 1.2 最简单的搜索请求 | 75 | ### 1.2 最简单的搜索请求 |
| 76 | 76 | ||
| 77 | ```bash | 77 | ```bash |
| 78 | -curl -X POST "http://120.76.41.98:6002/search/" \ | 78 | +curl -X POST "http://43.166.252.75:6002/search/" \ |
| 79 | -H "Content-Type: application/json" \ | 79 | -H "Content-Type: application/json" \ |
| 80 | -H "X-Tenant-ID: 162" \ | 80 | -H "X-Tenant-ID: 162" \ |
| 81 | -d '{"query": "芭比娃娃"}' | 81 | -d '{"query": "芭比娃娃"}' |
| @@ -84,7 +84,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -84,7 +84,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 84 | ### 1.3 带过滤与分页的搜索 | 84 | ### 1.3 带过滤与分页的搜索 |
| 85 | 85 | ||
| 86 | ```bash | 86 | ```bash |
| 87 | -curl -X POST "http://120.76.41.98:6002/search/" \ | 87 | +curl -X POST "http://43.166.252.75:6002/search/" \ |
| 88 | -H "Content-Type: application/json" \ | 88 | -H "Content-Type: application/json" \ |
| 89 | -H "X-Tenant-ID: 162" \ | 89 | -H "X-Tenant-ID: 162" \ |
| 90 | -d '{ | 90 | -d '{ |
| @@ -108,7 +108,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | @@ -108,7 +108,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ | ||
| 108 | ### 1.4 开启分面的搜索 | 108 | ### 1.4 开启分面的搜索 |
| 109 | 109 | ||
| 110 | ```bash | 110 | ```bash |
| 111 | -curl -X POST "http://120.76.41.98:6002/search/" \ | 111 | +curl -X POST "http://43.166.252.75:6002/search/" \ |
| 112 | -H "Content-Type: application/json" \ | 112 | -H "Content-Type: application/json" \ |
| 113 | -H "X-Tenant-ID: 162" \ | 113 | -H "X-Tenant-ID: 162" \ |
| 114 | -d '{ | 114 | -d '{ |
docs/环境配置说明.md
| @@ -11,14 +11,36 @@ | @@ -11,14 +11,36 @@ | ||
| 11 | 11 | ||
| 12 | ## 2. Python 运行环境 | 12 | ## 2. Python 运行环境 |
| 13 | 13 | ||
| 14 | -**推荐方式(与项目脚本一致)**:使用项目根目录下的 `activate.sh` 激活环境,会自动加载当前目录下的 `.env`(忽略注释与空行): | 14 | +本项目历史上使用 Conda 管理环境;目前推荐使用 **venv**(更轻量、对 CI/容器更友好)。项目根目录下的 `activate.sh` 已升级为 **优先激活 `.venv`,并兼容 Conda 回退**,且会自动加载当前目录下的 `.env`(忽略注释与空行)。 |
| 15 | + | ||
| 16 | +### 2.1 venv(推荐) | ||
| 17 | + | ||
| 18 | +首次创建 venv: | ||
| 19 | + | ||
| 20 | +```bash | ||
| 21 | +cd /data/saas-search | ||
| 22 | +./scripts/create_venv.sh | ||
| 23 | +source activate.sh | ||
| 24 | +``` | ||
| 25 | + | ||
| 26 | +如需运行本地 embedding / 图像编码服务(torch/transformers 等依赖较重): | ||
| 15 | 27 | ||
| 16 | ```bash | 28 | ```bash |
| 17 | cd /data/saas-search | 29 | cd /data/saas-search |
| 30 | +INSTALL_ML=1 ./scripts/create_venv.sh | ||
| 18 | source activate.sh | 31 | source activate.sh |
| 19 | ``` | 32 | ``` |
| 20 | 33 | ||
| 21 | -`activate.sh` 会激活 Conda 环境 `searchengine`。若在新机器上部署,请先设置本机 Conda 路径再执行: | 34 | +日常使用: |
| 35 | + | ||
| 36 | +```bash | ||
| 37 | +cd /data/saas-search | ||
| 38 | +source activate.sh | ||
| 39 | +``` | ||
| 40 | + | ||
| 41 | +### 2.2 Conda(兼容旧流程) | ||
| 42 | + | ||
| 43 | +`activate.sh` 会在未发现 `.venv` 时回退激活 Conda 环境 `searchengine`。若在新机器上部署,请先设置本机 Conda 路径再执行: | ||
| 22 | 44 | ||
| 23 | ```bash | 45 | ```bash |
| 24 | # 你的 conda 在 ~/anaconda3/bin/conda,则 CONDA_ROOT=~/anaconda3 | 46 | # 你的 conda 在 ~/anaconda3/bin/conda,则 CONDA_ROOT=~/anaconda3 |
| @@ -26,7 +48,7 @@ export CONDA_ROOT=$HOME/anaconda3 # 或你的 Conda 安装路径(如 /home/u | @@ -26,7 +48,7 @@ export CONDA_ROOT=$HOME/anaconda3 # 或你的 Conda 安装路径(如 /home/u | ||
| 26 | source activate.sh | 48 | source activate.sh |
| 27 | ``` | 49 | ``` |
| 28 | 50 | ||
| 29 | -**新机器首次部署(创建环境)**:若本机尚未创建 `searchengine` 环境,任选其一: | 51 | +**新机器首次部署(创建 Conda 环境)**:若本机尚未创建 `searchengine` 环境,任选其一: |
| 30 | 52 | ||
| 31 | - **方式 A(推荐,与 environment.yml 一致)**: | 53 | - **方式 A(推荐,与 environment.yml 一致)**: |
| 32 | ```bash | 54 | ```bash |
| @@ -82,7 +104,7 @@ DB_PASSWORD=P89cZHS5d7dFyc9R | @@ -82,7 +104,7 @@ DB_PASSWORD=P89cZHS5d7dFyc9R | ||
| 82 | 104 | ||
| 83 | # Elasticsearch | 105 | # Elasticsearch |
| 84 | ES_HOST=http://localhost:9200 | 106 | ES_HOST=http://localhost:9200 |
| 85 | -ES_USERNAME=essa | 107 | +ES_USERNAME=saas |
| 86 | ES_PASSWORD=4hOaLaf41y2VuI8y | 108 | ES_PASSWORD=4hOaLaf41y2VuI8y |
| 87 | 109 | ||
| 88 | # Redis(可选) | 110 | # Redis(可选) |
| @@ -105,7 +127,7 @@ API_PORT=6002 | @@ -105,7 +127,7 @@ API_PORT=6002 | ||
| 105 | | 项目 | 值 | | 127 | | 项目 | 值 | |
| 106 | |------|----| | 128 | |------|----| |
| 107 | | **MySQL** | host `120.79.247.228`, port `3316`, user `saas`, password `P89cZHS5d7dFyc9R` | | 129 | | **MySQL** | host `120.79.247.228`, port `3316`, user `saas`, password `P89cZHS5d7dFyc9R` | |
| 108 | -| **Elasticsearch** | host `http://localhost:9200`, user `essa`, password `4hOaLaf41y2VuI8y` | | 130 | +| **Elasticsearch** | host `http://localhost:9200`, user `saas`, password `4hOaLaf41y2VuI8y` | |
| 109 | | **Redis(可选)** | host `localhost`, port `6479`, password `BMfv5aI31kgHWtlx` | | 131 | | **Redis(可选)** | host `localhost`, port `6479`, password `BMfv5aI31kgHWtlx` | |
| 110 | | **DeepL** | `c9293ab4-ad25-479b-919f-ab4e63b429ed` | | 132 | | **DeepL** | `c9293ab4-ad25-479b-919f-ab4e63b429ed` | |
| 111 | 133 |
frontend/README.md
| @@ -82,7 +82,7 @@ bash scripts/start_backend.sh | @@ -82,7 +82,7 @@ bash scripts/start_backend.sh | ||
| 82 | ``` | 82 | ``` |
| 83 | 83 | ||
| 84 | 2. **访问前端**: | 84 | 2. **访问前端**: |
| 85 | -打开浏览器访问:`http://120.76.41.98:6002/` | 85 | +打开浏览器访问:`http://43.166.252.75:6002/` |
| 86 | 86 | ||
| 87 | ### 搜索示例 | 87 | ### 搜索示例 |
| 88 | 88 | ||
| @@ -97,7 +97,7 @@ bash scripts/start_backend.sh | @@ -97,7 +97,7 @@ bash scripts/start_backend.sh | ||
| 97 | 前端通过以下接口与后端通信: | 97 | 前端通过以下接口与后端通信: |
| 98 | 98 | ||
| 99 | ```javascript | 99 | ```javascript |
| 100 | -POST http://120.76.41.98:6002/search/ | 100 | +POST http://43.166.252.75:6002/search/ |
| 101 | 101 | ||
| 102 | 请求体: | 102 | 请求体: |
| 103 | { | 103 | { |
indexer/README.md
| @@ -198,7 +198,7 @@ categoryPath.set(categoryLang, translationCategoryPath) | @@ -198,7 +198,7 @@ categoryPath.set(categoryLang, translationCategoryPath) | ||
| 198 | 你当前要使用的翻译接口(Python 侧): | 198 | 你当前要使用的翻译接口(Python 侧): |
| 199 | 199 | ||
| 200 | ```bash | 200 | ```bash |
| 201 | -curl -X POST http://120.76.41.98:6006/translate \ | 201 | +curl -X POST http://43.166.252.75:6006/translate \ |
| 202 | -H "Content-Type: application/json" \ | 202 | -H "Content-Type: application/json" \ |
| 203 | -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣", | 203 | -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣", |
| 204 | "target_lang":"en", | 204 | "target_lang":"en", |
indexer/prompts.txt
| @@ -22,7 +22,7 @@ java索引程序职责: | @@ -22,7 +22,7 @@ java索引程序职责: | ||
| 22 | 本模块: | 22 | 本模块: |
| 23 | 负责 msyql 基础数据 → 索引结构的doc (包括缓存) | 23 | 负责 msyql 基础数据 → 索引结构的doc (包括缓存) |
| 24 | 24 | ||
| 25 | -翻译接口: curl -X POST http://120.76.41.98:6006/translate -H "Content-Type: application/json" -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣","target_lang":"en","source_lang":"auto"}' | 25 | +翻译接口: curl -X POST http://43.166.252.75:6006/translate -H "Content-Type: application/json" -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣","target_lang":"en","source_lang":"auto"}' |
| 26 | 26 | ||
| 27 | java的组织doc的逻辑都需要迁移过来。 | 27 | java的组织doc的逻辑都需要迁移过来。 |
| 28 | 28 |
query/query_parser.py
| @@ -8,7 +8,6 @@ from typing import Dict, List, Optional, Any, Union | @@ -8,7 +8,6 @@ from typing import Dict, List, Optional, Any, Union | ||
| 8 | import numpy as np | 8 | import numpy as np |
| 9 | import logging | 9 | import logging |
| 10 | import re | 10 | import re |
| 11 | -import hanlp | ||
| 12 | from concurrent.futures import Future, ThreadPoolExecutor, as_completed | 11 | from concurrent.futures import Future, ThreadPoolExecutor, as_completed |
| 13 | 12 | ||
| 14 | from embeddings import BgeEncoder | 13 | from embeddings import BgeEncoder |
| @@ -19,6 +18,10 @@ from .query_rewriter import QueryRewriter, QueryNormalizer | @@ -19,6 +18,10 @@ from .query_rewriter import QueryRewriter, QueryNormalizer | ||
| 19 | 18 | ||
| 20 | logger = logging.getLogger(__name__) | 19 | logger = logging.getLogger(__name__) |
| 21 | 20 | ||
| 21 | +try: | ||
| 22 | + import hanlp # type: ignore | ||
| 23 | +except Exception: # pragma: no cover | ||
| 24 | + hanlp = None | ||
| 22 | 25 | ||
| 23 | class ParsedQuery: | 26 | class ParsedQuery: |
| 24 | """Container for parsed query results.""" | 27 | """Container for parsed query results.""" |
| @@ -94,12 +97,22 @@ class QueryParser: | @@ -94,12 +97,22 @@ class QueryParser: | ||
| 94 | self.language_detector = LanguageDetector() | 97 | self.language_detector = LanguageDetector() |
| 95 | self.rewriter = QueryRewriter(config.query_config.rewrite_dictionary) | 98 | self.rewriter = QueryRewriter(config.query_config.rewrite_dictionary) |
| 96 | 99 | ||
| 97 | - # Initialize HanLP components at startup | ||
| 98 | - logger.info("Initializing HanLP components...") | ||
| 99 | - self._tok = hanlp.load(hanlp.pretrained.tok.CTB9_TOK_ELECTRA_BASE_CRF) | ||
| 100 | - self._tok.config.output_spans = True | ||
| 101 | - self._pos_tag = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL) | ||
| 102 | - logger.info("HanLP components initialized") | 100 | + # Optional HanLP components (heavy). If unavailable, fall back to a lightweight tokenizer. |
| 101 | + self._tok = None | ||
| 102 | + self._pos_tag = None | ||
| 103 | + if hanlp is not None: | ||
| 104 | + try: | ||
| 105 | + logger.info("Initializing HanLP components...") | ||
| 106 | + self._tok = hanlp.load(hanlp.pretrained.tok.CTB9_TOK_ELECTRA_BASE_CRF) | ||
| 107 | + self._tok.config.output_spans = True | ||
| 108 | + self._pos_tag = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL) | ||
| 109 | + logger.info("HanLP components initialized") | ||
| 110 | + except Exception as e: | ||
| 111 | + logger.warning(f"HanLP init failed, falling back to simple tokenizer: {e}") | ||
| 112 | + self._tok = None | ||
| 113 | + self._pos_tag = None | ||
| 114 | + else: | ||
| 115 | + logger.info("HanLP not installed; using simple tokenizer") | ||
| 103 | 116 | ||
| 104 | @property | 117 | @property |
| 105 | def text_encoder(self) -> BgeEncoder: | 118 | def text_encoder(self) -> BgeEncoder: |
| @@ -121,32 +134,51 @@ class QueryParser: | @@ -121,32 +134,51 @@ class QueryParser: | ||
| 121 | translation_context=self.config.query_config.translation_context | 134 | translation_context=self.config.query_config.translation_context |
| 122 | ) | 135 | ) |
| 123 | return self._translator | 136 | return self._translator |
| 137 | + | ||
| 138 | + def _simple_tokenize(self, text: str) -> List[str]: | ||
| 139 | + """ | ||
| 140 | + Lightweight tokenizer fallback. | ||
| 141 | + | ||
| 142 | + - Groups consecutive CJK chars as a token | ||
| 143 | + - Groups consecutive latin/digits/underscore/dash as a token | ||
| 144 | + """ | ||
| 145 | + if not text: | ||
| 146 | + return [] | ||
| 147 | + pattern = re.compile(r"[\u4e00-\u9fff]+|[A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)*") | ||
| 148 | + return pattern.findall(text) | ||
| 124 | 149 | ||
| 125 | def _extract_keywords(self, query: str) -> str: | 150 | def _extract_keywords(self, query: str) -> str: |
| 126 | """Extract keywords (nouns with length > 1) from query.""" | 151 | """Extract keywords (nouns with length > 1) from query.""" |
| 127 | - tok_result = self._tok(query) | ||
| 128 | - if not tok_result: | ||
| 129 | - return "" | ||
| 130 | - | ||
| 131 | - words = [x[0] for x in tok_result] | ||
| 132 | - pos_tags = self._pos_tag(words) | ||
| 133 | - | ||
| 134 | - keywords = [] | ||
| 135 | - for word, pos in zip(words, pos_tags): | ||
| 136 | - if len(word) > 1 and pos.startswith('N'): | ||
| 137 | - keywords.append(word) | ||
| 138 | - | 152 | + if self._tok is not None and self._pos_tag is not None: |
| 153 | + tok_result = self._tok(query) | ||
| 154 | + if not tok_result: | ||
| 155 | + return "" | ||
| 156 | + words = [x[0] for x in tok_result] | ||
| 157 | + pos_tags = self._pos_tag(words) | ||
| 158 | + keywords = [] | ||
| 159 | + for word, pos in zip(words, pos_tags): | ||
| 160 | + if len(word) > 1 and isinstance(pos, str) and pos.startswith("N"): | ||
| 161 | + keywords.append(word) | ||
| 162 | + return " ".join(keywords) | ||
| 163 | + | ||
| 164 | + # Fallback: treat tokens with length > 1 as "keywords" | ||
| 165 | + tokens = self._simple_tokenize(query) | ||
| 166 | + keywords = [t for t in tokens if len(t) > 1] | ||
| 139 | return " ".join(keywords) | 167 | return " ".join(keywords) |
| 140 | 168 | ||
| 141 | def _get_token_count(self, query: str) -> int: | 169 | def _get_token_count(self, query: str) -> int: |
| 142 | - """Get token count using HanLP.""" | ||
| 143 | - tok_result = self._tok(query) | ||
| 144 | - return len(tok_result) if tok_result else 0 | 170 | + """Get token count (HanLP if available, otherwise simple).""" |
| 171 | + if self._tok is not None: | ||
| 172 | + tok_result = self._tok(query) | ||
| 173 | + return len(tok_result) if tok_result else 0 | ||
| 174 | + return len(self._simple_tokenize(query)) | ||
| 145 | 175 | ||
| 146 | def _get_query_tokens(self, query: str) -> List[str]: | 176 | def _get_query_tokens(self, query: str) -> List[str]: |
| 147 | - """Get token list using HanLP.""" | ||
| 148 | - tok_result = self._tok(query) | ||
| 149 | - return [x[0] for x in tok_result] if tok_result else [] | 177 | + """Get token list (HanLP if available, otherwise simple).""" |
| 178 | + if self._tok is not None: | ||
| 179 | + tok_result = self._tok(query) | ||
| 180 | + return [x[0] for x in tok_result] if tok_result else [] | ||
| 181 | + return self._simple_tokenize(query) | ||
| 150 | 182 | ||
| 151 | def parse( | 183 | def parse( |
| 152 | self, | 184 | self, |
requirements.txt
| @@ -12,14 +12,13 @@ pandas>=2.0.0 | @@ -12,14 +12,13 @@ pandas>=2.0.0 | ||
| 12 | # Elasticsearch | 12 | # Elasticsearch |
| 13 | elasticsearch>=8.0.0,<9.0.0 | 13 | elasticsearch>=8.0.0,<9.0.0 |
| 14 | 14 | ||
| 15 | -# ML/Embeddings | ||
| 16 | -torch>=2.0.0 | ||
| 17 | -sentence-transformers>=2.2.0 | ||
| 18 | -transformers>=4.30.0 | ||
| 19 | -modelscope>=1.9.0 | ||
| 20 | -cn-clip>=1.5.0 | 15 | +# Redis (cache; used by translator/embeddings) |
| 16 | +redis>=5.0.0 | ||
| 17 | + | ||
| 18 | +# Math / vector utilities (used across modules) | ||
| 21 | numpy>=1.24.0 | 19 | numpy>=1.24.0 |
| 22 | -pillow>=10.0.0 | 20 | + |
| 21 | +# LLM/Translation clients | ||
| 23 | openai>=1.0.0 | 22 | openai>=1.0.0 |
| 24 | 23 | ||
| 25 | # API | 24 | # API |
| @@ -0,0 +1,16 @@ | @@ -0,0 +1,16 @@ | ||
| 1 | +# Optional heavy dependencies for local embedding/image encoding. | ||
| 2 | +# | ||
| 3 | +# Install when you need: | ||
| 4 | +# - `./scripts/start_embedding_service.sh` (local embeddings server) | ||
| 5 | +# - local BGE-M3 / CN-CLIP inference | ||
| 6 | +# | ||
| 7 | +# Notes: | ||
| 8 | +# - `torch` wheels can be very large; if you want CPU-only wheels, | ||
| 9 | +# consider installing torch separately with the official CPU index. | ||
| 10 | +# | ||
| 11 | +torch>=2.0.0 | ||
| 12 | +sentence-transformers>=2.2.0 | ||
| 13 | +transformers>=4.30.0 | ||
| 14 | +modelscope>=1.9.0 | ||
| 15 | +cn-clip>=1.5.0 | ||
| 16 | +pillow>=10.0.0 |
| @@ -0,0 +1,59 @@ | @@ -0,0 +1,59 @@ | ||
| 1 | +#!/bin/bash | ||
| 2 | +# | ||
| 3 | +# Create and initialize Python venv for saas-search. | ||
| 4 | +# | ||
| 5 | +# Usage: | ||
| 6 | +# ./scripts/create_venv.sh | ||
| 7 | +# | ||
| 8 | +set -euo pipefail | ||
| 9 | + | ||
| 10 | +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | ||
| 11 | +cd "${PROJECT_ROOT}" | ||
| 12 | + | ||
| 13 | +VENV_DIR="${PROJECT_ROOT}/.venv" | ||
| 14 | + | ||
| 15 | +PYTHON_BIN="${PYTHON_BIN:-python3.10}" | ||
| 16 | +if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then | ||
| 17 | + PYTHON_BIN="python3" | ||
| 18 | +fi | ||
| 19 | + | ||
| 20 | +echo "Using python: $(${PYTHON_BIN} --version)" | ||
| 21 | + | ||
| 22 | +if ! "${PYTHON_BIN}" -c "import ensurepip" >/dev/null 2>&1; then | ||
| 23 | + echo "ERROR: ensurepip is not available for ${PYTHON_BIN}." >&2 | ||
| 24 | + echo "On Ubuntu/Debian, install the venv package first, e.g.:" >&2 | ||
| 25 | + echo " sudo apt-get update -y && sudo apt-get install -y python3-venv" >&2 | ||
| 26 | + echo "If you are using Python 3.12 specifically, you may need:" >&2 | ||
| 27 | + echo " sudo apt-get install -y python3.12-venv" >&2 | ||
| 28 | + exit 1 | ||
| 29 | +fi | ||
| 30 | + | ||
| 31 | +if [[ -d "${VENV_DIR}" ]]; then | ||
| 32 | + if [[ -f "${VENV_DIR}/bin/activate" ]]; then | ||
| 33 | + echo "venv already exists at ${VENV_DIR}" | ||
| 34 | + else | ||
| 35 | + echo "Found incomplete venv at ${VENV_DIR}, recreating..." | ||
| 36 | + rm -rf "${VENV_DIR}" | ||
| 37 | + "${PYTHON_BIN}" -m venv "${VENV_DIR}" | ||
| 38 | + fi | ||
| 39 | +else | ||
| 40 | + echo "Creating venv at ${VENV_DIR} ..." | ||
| 41 | + "${PYTHON_BIN}" -m venv "${VENV_DIR}" | ||
| 42 | +fi | ||
| 43 | + | ||
| 44 | +# shellcheck disable=SC1091 | ||
| 45 | +source "${VENV_DIR}/bin/activate" | ||
| 46 | + | ||
| 47 | +python -m pip install --upgrade pip setuptools wheel | ||
| 48 | +python -m pip install -r requirements.txt | ||
| 49 | + | ||
| 50 | +if [[ "${INSTALL_ML:-0}" == "1" ]]; then | ||
| 51 | + echo | ||
| 52 | + echo "INSTALL_ML=1 detected. Installing optional ML dependencies..." | ||
| 53 | + python -m pip install -r requirements_ml.txt | ||
| 54 | +fi | ||
| 55 | + | ||
| 56 | +echo | ||
| 57 | +echo "Done." | ||
| 58 | +echo "Next:" | ||
| 59 | +echo " source activate.sh" |
scripts/mock_data.sh
| @@ -20,9 +20,7 @@ | @@ -20,9 +20,7 @@ | ||
| 20 | # ============================================================================ | 20 | # ============================================================================ |
| 21 | 21 | ||
| 22 | cd "$(dirname "$0")/.." | 22 | cd "$(dirname "$0")/.." |
| 23 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 24 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 25 | -conda activate searchengine | 23 | +source ./activate.sh |
| 26 | 24 | ||
| 27 | GREEN='\033[0;32m' | 25 | GREEN='\033[0;32m' |
| 28 | YELLOW='\033[1;33m' | 26 | YELLOW='\033[1;33m' |
| @@ -33,13 +31,6 @@ echo -e "${GREEN}========================================${NC}" | @@ -33,13 +31,6 @@ echo -e "${GREEN}========================================${NC}" | ||
| 33 | echo -e "${GREEN}Mock Data Script${NC}" | 31 | echo -e "${GREEN}Mock Data Script${NC}" |
| 34 | echo -e "${GREEN}========================================${NC}" | 32 | echo -e "${GREEN}========================================${NC}" |
| 35 | 33 | ||
| 36 | -# Load config from .env file if it exists | ||
| 37 | -if [ -f .env ]; then | ||
| 38 | - set -a | ||
| 39 | - source .env | ||
| 40 | - set +a | ||
| 41 | -fi | ||
| 42 | - | ||
| 43 | # ============================================================================ | 34 | # ============================================================================ |
| 44 | # 写死的配置参数(不需要配置化,这是测试数据构造脚本) | 35 | # 写死的配置参数(不需要配置化,这是测试数据构造脚本) |
| 45 | # ============================================================================ | 36 | # ============================================================================ |
scripts/start_backend.sh
| @@ -5,9 +5,7 @@ | @@ -5,9 +5,7 @@ | ||
| 5 | set -e | 5 | set -e |
| 6 | 6 | ||
| 7 | cd "$(dirname "$0")/.." | 7 | cd "$(dirname "$0")/.." |
| 8 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 9 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 10 | -conda activate searchengine | 8 | +source ./activate.sh |
| 11 | 9 | ||
| 12 | GREEN='\033[0;32m' | 10 | GREEN='\033[0;32m' |
| 13 | YELLOW='\033[1;33m' | 11 | YELLOW='\033[1;33m' |
| @@ -17,13 +15,6 @@ echo -e "${GREEN}========================================${NC}" | @@ -17,13 +15,6 @@ echo -e "${GREEN}========================================${NC}" | ||
| 17 | echo -e "${GREEN}Starting Backend API Service${NC}" | 15 | echo -e "${GREEN}Starting Backend API Service${NC}" |
| 18 | echo -e "${GREEN}========================================${NC}" | 16 | echo -e "${GREEN}========================================${NC}" |
| 19 | 17 | ||
| 20 | -# Load config from .env file if it exists | ||
| 21 | -if [ -f .env ]; then | ||
| 22 | - set -a | ||
| 23 | - source .env | ||
| 24 | - set +a | ||
| 25 | -fi | ||
| 26 | - | ||
| 27 | echo -e "\n${YELLOW}Configuration:${NC}" | 18 | echo -e "\n${YELLOW}Configuration:${NC}" |
| 28 | echo " API Host: ${API_HOST:-0.0.0.0}" | 19 | echo " API Host: ${API_HOST:-0.0.0.0}" |
| 29 | echo " API Port: ${API_PORT:-6002}" | 20 | echo " API Port: ${API_PORT:-6002}" |
scripts/start_embedding_service.sh
| @@ -12,12 +12,7 @@ set -e | @@ -12,12 +12,7 @@ set -e | ||
| 12 | 12 | ||
| 13 | cd "$(dirname "$0")/.." | 13 | cd "$(dirname "$0")/.." |
| 14 | 14 | ||
| 15 | -# Load conda env if available (keep consistent with other scripts) | ||
| 16 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 17 | -if [ -f "$CONDA_ROOT/etc/profile.d/conda.sh" ]; then | ||
| 18 | - source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 19 | - conda activate searchengine | ||
| 20 | -fi | 15 | +source ./activate.sh |
| 21 | 16 | ||
| 22 | EMBEDDING_SERVICE_HOST=$(python -c "from embeddings.config import CONFIG; print(CONFIG.HOST)") | 17 | EMBEDDING_SERVICE_HOST=$(python -c "from embeddings.config import CONFIG; print(CONFIG.HOST)") |
| 23 | EMBEDDING_SERVICE_PORT=$(python -c "from embeddings.config import CONFIG; print(CONFIG.PORT)") | 18 | EMBEDDING_SERVICE_PORT=$(python -c "from embeddings.config import CONFIG; print(CONFIG.PORT)") |
scripts/start_frontend.sh
| @@ -5,9 +5,7 @@ | @@ -5,9 +5,7 @@ | ||
| 5 | set -e | 5 | set -e |
| 6 | 6 | ||
| 7 | cd "$(dirname "$0")/.." | 7 | cd "$(dirname "$0")/.." |
| 8 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 9 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 10 | -conda activate searchengine | 8 | +source ./activate.sh |
| 11 | 9 | ||
| 12 | GREEN='\033[0;32m' | 10 | GREEN='\033[0;32m' |
| 13 | YELLOW='\033[1;33m' | 11 | YELLOW='\033[1;33m' |
scripts/start_indexer.sh
| @@ -5,9 +5,7 @@ | @@ -5,9 +5,7 @@ | ||
| 5 | set -e | 5 | set -e |
| 6 | 6 | ||
| 7 | cd "$(dirname "$0")/.." | 7 | cd "$(dirname "$0")/.." |
| 8 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 9 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 10 | -conda activate searchengine | 8 | +source ./activate.sh |
| 11 | 9 | ||
| 12 | GREEN='\033[0;32m' | 10 | GREEN='\033[0;32m' |
| 13 | YELLOW='\033[1;33m' | 11 | YELLOW='\033[1;33m' |
| @@ -17,13 +15,6 @@ echo -e "${GREEN}========================================${NC}" | @@ -17,13 +15,6 @@ echo -e "${GREEN}========================================${NC}" | ||
| 17 | echo -e "${GREEN}Starting Indexer API Service${NC}" | 15 | echo -e "${GREEN}Starting Indexer API Service${NC}" |
| 18 | echo -e "${GREEN}========================================${NC}" | 16 | echo -e "${GREEN}========================================${NC}" |
| 19 | 17 | ||
| 20 | -# Load config from .env file if it exists | ||
| 21 | -if [ -f .env ]; then | ||
| 22 | - set -a | ||
| 23 | - source .env | ||
| 24 | - set +a | ||
| 25 | -fi | ||
| 26 | - | ||
| 27 | echo -e "\n${YELLOW}Configuration:${NC}" | 18 | echo -e "\n${YELLOW}Configuration:${NC}" |
| 28 | echo " INDEXER Host: ${INDEXER_HOST:-0.0.0.0}" | 19 | echo " INDEXER Host: ${INDEXER_HOST:-0.0.0.0}" |
| 29 | echo " INDEXER Port: ${INDEXER_PORT:-6004}" | 20 | echo " INDEXER Port: ${INDEXER_PORT:-6004}" |
scripts/tenant3__csv_to_shoplazza_xlsx.sh
| 1 | -# 激活环境 | ||
| 2 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 3 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 4 | -conda activate searchengine | 1 | +#!/bin/bash |
| 2 | +set -e | ||
| 3 | + | ||
| 4 | +cd "$(dirname "$0")/.." | ||
| 5 | +source ./activate.sh | ||
| 5 | 6 | ||
| 6 | # # 基本使用(生成所有数据) | 7 | # # 基本使用(生成所有数据) |
| 7 | # python scripts/csv_to_excel.py | 8 | # python scripts/csv_to_excel.py |
setup.sh
| 1 | #!/bin/bash | 1 | #!/bin/bash |
| 2 | 2 | ||
| 3 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | ||
| 4 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | ||
| 5 | - | ||
| 6 | # saas-search Setup and Startup Script | 3 | # saas-search Setup and Startup Script |
| 7 | # This script sets up the environment and starts all services | 4 | # This script sets up the environment and starts all services |
| 8 | 5 | ||
| @@ -22,24 +19,16 @@ echo -e "${GREEN}========================================${NC}" | @@ -22,24 +19,16 @@ echo -e "${GREEN}========================================${NC}" | ||
| 22 | cd "$(dirname "$0")" | 19 | cd "$(dirname "$0")" |
| 23 | PROJECT_ROOT=$(pwd) | 20 | PROJECT_ROOT=$(pwd) |
| 24 | 21 | ||
| 25 | -echo -e "\n${YELLOW}Step 1: Setting up Conda environment${NC}" | ||
| 26 | -# Check if conda is available | ||
| 27 | -if ! command -v conda &> /dev/null; then | ||
| 28 | - echo -e "${RED}Error: conda not found. Please install Miniconda or Anaconda${NC}" | ||
| 29 | - exit 1 | ||
| 30 | -fi | 22 | +echo -e "\n${YELLOW}Step 1: Setting up Python environment (venv preferred)${NC}" |
| 31 | 23 | ||
| 32 | -# Check if environment exists | ||
| 33 | -if conda env list | grep -q "searchengine"; then | ||
| 34 | - echo -e "${GREEN}Environment 'searchengine' already exists${NC}" | ||
| 35 | - conda activate searchengine | ||
| 36 | -else | ||
| 37 | - echo -e "${YELLOW}Creating conda environment 'searchengine'...${NC}" | ||
| 38 | - conda env create -f environment.yml | ||
| 39 | - conda activate searchengine | ||
| 40 | - echo -e "${GREEN}Environment created successfully!${NC}" | 24 | +if [ ! -f "${PROJECT_ROOT}/.venv/bin/activate" ]; then |
| 25 | + echo -e "${YELLOW}Creating venv and installing dependencies...${NC}" | ||
| 26 | + ./scripts/create_venv.sh | ||
| 41 | fi | 27 | fi |
| 42 | 28 | ||
| 29 | +# Activate environment + load .env | ||
| 30 | +source ./activate.sh | ||
| 31 | + | ||
| 43 | # Verify environment | 32 | # Verify environment |
| 44 | echo -e "\n${YELLOW}Current Python version:${NC}" | 33 | echo -e "\n${YELLOW}Current Python version:${NC}" |
| 45 | python --version | 34 | python --version |
| @@ -74,7 +63,7 @@ echo -e "${GREEN}Setup Complete!${NC}" | @@ -74,7 +63,7 @@ echo -e "${GREEN}Setup Complete!${NC}" | ||
| 74 | echo -e "${GREEN}========================================${NC}" | 63 | echo -e "${GREEN}========================================${NC}" |
| 75 | echo "" | 64 | echo "" |
| 76 | echo -e "Next steps:" | 65 | echo -e "Next steps:" |
| 77 | -echo -e " 1. Ingest data: ${YELLOW}./scripts/ingest.sh${NC}" | ||
| 78 | -echo -e " 2. Start backend: ${YELLOW}./scripts/start_backend.sh${NC}" | 66 | +echo -e " 1. Start backend: ${YELLOW}./scripts/start_backend.sh${NC}" |
| 67 | +echo -e " 2. Start indexer: ${YELLOW}./scripts/start_indexer.sh${NC}" | ||
| 79 | echo -e " 3. Start frontend: ${YELLOW}./scripts/start_frontend.sh${NC}" | 68 | echo -e " 3. Start frontend: ${YELLOW}./scripts/start_frontend.sh${NC}" |
| 80 | echo "" | 69 | echo "" |