Commit 484adbfe9ccfae39f5ce92c0173011d724402d47
1 parent
a7920e17
adapt ubuntu; conda -> venv
Showing
26 changed files
with
290 additions
and
154 deletions
Show diff stats
| 1 | 1 | # Elasticsearch Configuration |
| 2 | 2 | ES_HOST=http://localhost:9200 |
| 3 | -ES_USERNAME=essa | |
| 3 | +ES_USERNAME=saas | |
| 4 | 4 | ES_PASSWORD=4hOaLaf41y2VuI8y |
| 5 | 5 | |
| 6 | 6 | # Redis Configuration (Optional) |
| ... | ... | @@ -30,7 +30,7 @@ IMAGE_MODEL_DIR=/data/tw/models/cn-clip # å·²ç»æ”¹ä¸ºweb请求了,ä¸ä½¿ç”¨æœ |
| 30 | 30 | CACHE_DIR=.cache |
| 31 | 31 | |
| 32 | 32 | # Frontend API Base URL |
| 33 | -API_BASE_URL=http://120.76.41.98:6002 | |
| 33 | +API_BASE_URL=http://43.166.252.75:6002 | |
| 34 | 34 | |
| 35 | 35 | |
| 36 | 36 | DASHSCOPE_API_KEY=sk-c3b8d4db061840aa8effb748df2a997b | ... | ... |
.gitignore
README.md
| ... | ... | @@ -70,7 +70,7 @@ query anchor |
| 70 | 70 | |
| 71 | 71 | 对外: |
| 72 | 72 | embedding服务: |
| 73 | - curl -X POST http://120.76.41.98:6005/embed/text \ | |
| 73 | + curl -X POST http://43.166.252.75:6005/embed/text \ | |
| 74 | 74 | -H "Content-Type: application/json" \ |
| 75 | 75 | -d '["衣服", "Bohemian Maxi Dress"]' |
| 76 | 76 | |
| ... | ... | @@ -91,7 +91,7 @@ localhost替换为 |
| 91 | 91 | 服务器内网地址: |
| 92 | 92 | 10.0.163.168 |
| 93 | 93 | 公网地址: |
| 94 | -120.76.41.98 | |
| 94 | +43.166.252.75 | |
| 95 | 95 | |
| 96 | 96 | |
| 97 | 97 | # 电商搜索引擎 SaaS |
| ... | ... | @@ -101,15 +101,18 @@ localhost替换为 |
| 101 | 101 | |
| 102 | 102 | ## 项目环境 |
| 103 | 103 | |
| 104 | -以项目根目录的 **`activate.sh`** 为准(会激活 Conda 环境 `searchengine` 并加载 `.env`): | |
| 104 | +以项目根目录的 **`activate.sh`** 为准(**优先激活 venv:`./.venv`,并加载 `.env`;兼容 Conda 回退**): | |
| 105 | 105 | |
| 106 | 106 | ```bash |
| 107 | -# 若在新机器且 Conda 不在默认路径,先设置: | |
| 108 | -# - 你的 conda 是 ~/anaconda3/bin/conda,则:export CONDA_ROOT=$HOME/anaconda3 | |
| 107 | +# 推荐:首次创建 venv(默认安装基础依赖) | |
| 108 | +./scripts/create_venv.sh | |
| 109 | + | |
| 110 | +# 如需本地向量/图片编码(会安装 torch/transformers 等重依赖) | |
| 111 | +# INSTALL_ML=1 ./scripts/create_venv.sh | |
| 109 | 112 | source activate.sh |
| 110 | 113 | ``` |
| 111 | 114 | |
| 112 | -新机器首次需创建环境,见 `docs/环境配置说明.md`(`conda env create -f environment.yml` 或 `pip install -r requirements.txt`)。 | |
| 115 | +新机器首次需创建环境,见 `docs/环境配置说明.md`(推荐 venv;Conda 为兼容旧流程)。 | |
| 113 | 116 | |
| 114 | 117 | ## 测试pipeline |
| 115 | 118 | ... | ... |
activate.sh
| 1 | 1 | #!/bin/bash |
| 2 | -# 新机器部署:可设置 CONDA_ROOT 指向本机 Conda 路径 | |
| 3 | -# 例如你的 conda 是 ~/anaconda3/bin/conda,则 export CONDA_ROOT=$HOME/anaconda3 | |
| 4 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | |
| 5 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | |
| 6 | -conda activate searchengine | |
| 2 | +# | |
| 3 | +# Unified environment activator (venv preferred, conda fallback). | |
| 4 | +# | |
| 5 | +# Usage: | |
| 6 | +# source activate.sh | |
| 7 | +# | |
| 8 | +# Priority: | |
| 9 | +# 1) ./.venv (Python venv) | |
| 10 | +# 2) conda env "searchengine" (legacy) | |
| 11 | +# | |
| 12 | + | |
| 13 | +# Must be sourced | |
| 14 | +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | |
| 15 | + echo "ERROR: Please source this script: source activate.sh" >&2 | |
| 16 | + exit 1 | |
| 17 | +fi | |
| 18 | + | |
| 19 | +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| 20 | + | |
| 21 | +# 1) venv (preferred) | |
| 22 | +VENV_ACTIVATE="${PROJECT_ROOT}/.venv/bin/activate" | |
| 23 | +if [[ -f "${VENV_ACTIVATE}" ]]; then | |
| 24 | + # shellcheck disable=SC1090 | |
| 25 | + source "${VENV_ACTIVATE}" | |
| 26 | + ENV_KIND="venv" | |
| 27 | +else | |
| 28 | + # 2) conda fallback (legacy) | |
| 29 | + # 新机器部署:可设置 CONDA_ROOT 指向本机 Conda 路径 | |
| 30 | + # 例如你的 conda 是 ~/anaconda3/bin/conda,则 export CONDA_ROOT=$HOME/anaconda3 | |
| 31 | + CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | |
| 32 | + if [[ -f "${CONDA_ROOT}/etc/profile.d/conda.sh" ]]; then | |
| 33 | + # shellcheck disable=SC1091 | |
| 34 | + source "${CONDA_ROOT}/etc/profile.d/conda.sh" | |
| 35 | + conda activate searchengine | |
| 36 | + ENV_KIND="conda" | |
| 37 | + else | |
| 38 | + echo "ERROR: No .venv found and conda.sh not found at ${CONDA_ROOT}/etc/profile.d/conda.sh" >&2 | |
| 39 | + echo " - Create venv: ./scripts/create_venv.sh" >&2 | |
| 40 | + echo " - Or set CONDA_ROOT to your conda install path" >&2 | |
| 41 | + return 1 | |
| 42 | + fi | |
| 43 | +fi | |
| 7 | 44 | |
| 8 | 45 | # 如果需要加载 .env 中的环境变量 |
| 9 | -if [ -f .env ]; then | |
| 46 | +ENV_FILE="${PROJECT_ROOT}/.env" | |
| 47 | +if [ -f "${ENV_FILE}" ]; then | |
| 10 | 48 | set -a # 自动导出所有变量 |
| 11 | - source <(grep -v '^#' .env | grep -v '^$' | sed 's/#.*$//' | sed 's/\r$//') | |
| 49 | + # NOTE: This loader tolerates comments/blank lines and strips inline comments. | |
| 50 | + source <(grep -v '^#' "${ENV_FILE}" | grep -v '^$' | sed 's/#.*$//' | sed 's/\r$//') | |
| 12 | 51 | set +a # 关闭自动导出 |
| 13 | 52 | fi |
| 14 | 53 | |
| 15 | -echo "Environment activated: searchengine" | |
| 54 | +echo "Environment activated (${ENV_KIND}): ${VIRTUAL_ENV:-${CONDA_DEFAULT_ENV:-unknown}}" | ... | ... |
docs/CNCLIP_SERVICE说明文档.md
| ... | ... | @@ -12,7 +12,7 @@ normlize后的结果: |
| 12 | 12 | https://aisearch.cdn.bcebos.com/fileManager/GtB5doGAr1skTx38P7fb7Q/182.jpg?authorization=bce-auth-v1%2F7e22d8caf5af46cc9310f1e3021709f3%2F2025-12-30T04%3A45%3A38Z%2F86400%2Fhost%2Ffe222039926cb7ff593021af40268c782b8892598114e24773d0c1bfc976a8df |
| 13 | 13 | https://oss.essa.cn/2e353867-7496-4d4e-a7c8-0af50f49f6eb.jpg?x-oss-process=image/resize,m_lfit,w_2048,h_2048 |
| 14 | 14 | |
| 15 | -curl -X POST "http://120.76.41.98:5000/embedding/generate_image_embeddings" -H "Content-Type: application/json" -d '[ | |
| 15 | +curl -X POST "http://43.166.252.75:5000/embedding/generate_image_embeddings" -H "Content-Type: application/json" -d '[ | |
| 16 | 16 | { |
| 17 | 17 | "id": "test_1", |
| 18 | 18 | "pic_url": "https://oss.essa.cn/98532128-cf8e-456c-9e30-6f2a5ea0c19f.jpg" | ... | ... |
docs/ES/ES_8.18/1_ES配置和使用.md
docs/ES/ES_8.18/2_kibana安装.md
docs/Usage-Guide.md
| ... | ... | @@ -27,10 +27,19 @@ |
| 27 | 27 | |
| 28 | 28 | #### 1. 安装 Python 依赖与激活环境 |
| 29 | 29 | |
| 30 | -**推荐**:使用项目根目录的 `activate.sh` 激活环境(会加载 `.env`)。新机器部署时若 Conda 不在默认路径,请先设置 `CONDA_ROOT`(例如你的 conda 是 `~/anaconda3/bin/conda`,则 `export CONDA_ROOT=$HOME/anaconda3`)。详见 `docs/环境配置说明.md`。 | |
| 30 | +**推荐**:使用项目根目录的 `activate.sh` 激活环境(会加载 `.env`)。目前推荐 venv(`.venv`);Conda 仅作为兼容回退(需要 `CONDA_ROOT`)。详见 `docs/环境配置说明.md`。 | |
| 31 | 31 | |
| 32 | 32 | ```bash |
| 33 | 33 | cd /data/saas-search |
| 34 | +./scripts/create_venv.sh # 首次创建 venv(只需执行一次) | |
| 35 | +source activate.sh | |
| 36 | +``` | |
| 37 | + | |
| 38 | +如果需要本地 embedding / 图像编码(会安装 torch/transformers 等较重依赖): | |
| 39 | + | |
| 40 | +```bash | |
| 41 | +cd /data/saas-search | |
| 42 | +INSTALL_ML=1 ./scripts/create_venv.sh | |
| 34 | 43 | source activate.sh |
| 35 | 44 | ``` |
| 36 | 45 | ... | ... |
docs/temporary/sku_image_src问题诊断报告.md
| ... | ... | @@ -98,7 +98,7 @@ else: |
| 98 | 98 | |
| 99 | 99 | 2. **验证修复**:重新索引后,查询 ES 验证 `image_src` 字段是否已包含: |
| 100 | 100 | ```bash |
| 101 | - curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' \ | |
| 101 | + curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' \ | |
| 102 | 102 | -H 'Content-Type: application/json' \ |
| 103 | 103 | -d '{ |
| 104 | 104 | "size": 1, | ... | ... |
docs/常用查询 - ES.md
| ... | ... | @@ -8,7 +8,7 @@ |
| 8 | 8 | # 一般情况下不需要在查询中再按 tenant_id 过滤(可选保留用于排查)。 |
| 9 | 9 | |
| 10 | 10 | ### 1. 根据 tenant_id / spu_id 查询 |
| 11 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 11 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 12 | 12 | "size": 11, |
| 13 | 13 | "_source": ["title"], |
| 14 | 14 | "query": { |
| ... | ... | @@ -21,7 +21,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ |
| 21 | 21 | }' |
| 22 | 22 | |
| 23 | 23 | |
| 24 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 24 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 25 | 25 | "size": 100, |
| 26 | 26 | "_source": ["title"], |
| 27 | 27 | "query": { |
| ... | ... | @@ -30,7 +30,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ |
| 30 | 30 | }' |
| 31 | 31 | |
| 32 | 32 | |
| 33 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 33 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 34 | 34 | "size": 5, |
| 35 | 35 | "_source": ["title", "keywords", "tags"], |
| 36 | 36 | "query": { |
| ... | ... | @@ -43,7 +43,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ |
| 43 | 43 | }' |
| 44 | 44 | |
| 45 | 45 | |
| 46 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 46 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 47 | 47 | "size": 1, |
| 48 | 48 | "_source": ["title", "keywords", "tags"], |
| 49 | 49 | "query": { |
| ... | ... | @@ -65,7 +65,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ |
| 65 | 65 | }' |
| 66 | 66 | |
| 67 | 67 | |
| 68 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 68 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 69 | 69 | "size": 1, |
| 70 | 70 | "_source": ["title"], |
| 71 | 71 | "query": { |
| ... | ... | @@ -86,17 +86,17 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ |
| 86 | 86 | } |
| 87 | 87 | }' |
| 88 | 88 | |
| 89 | -Curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ | |
| 89 | +Curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ | |
| 90 | 90 | "analyzer": "index_ansj", |
| 91 | 91 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" |
| 92 | 92 | }' |
| 93 | 93 | |
| 94 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ | |
| 94 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_analyze' -H 'Content-Type: application/json' -d '{ | |
| 95 | 95 | "analyzer": "query_ansj", |
| 96 | 96 | "text": "14寸第4代-眼珠实身冰雪公仔带手动大推车,搪胶雪宝宝" |
| 97 | 97 | }' |
| 98 | 98 | |
| 99 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 99 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 100 | 100 | "size": 100, |
| 101 | 101 | "from": 0, |
| 102 | 102 | "query": { |
| ... | ... | @@ -131,7 +131,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_ |
| 131 | 131 | } |
| 132 | 132 | }' |
| 133 | 133 | |
| 134 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 134 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 135 | 135 | "size": 1, |
| 136 | 136 | "from": 0, |
| 137 | 137 | "query": { |
| ... | ... | @@ -258,7 +258,7 @@ GET /search_products_tenant_2/_search |
| 258 | 258 | } |
| 259 | 259 | |
| 260 | 260 | |
| 261 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 261 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 262 | 262 | "size": 5, |
| 263 | 263 | "query": { |
| 264 | 264 | "bool": { |
| ... | ... | @@ -271,7 +271,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/ |
| 271 | 271 | |
| 272 | 272 | |
| 273 | 273 | ### 2. 统计租户的总文档数 |
| 274 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 274 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_170/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 275 | 275 | "query": { |
| 276 | 276 | "match_all": {} |
| 277 | 277 | } |
| ... | ... | @@ -285,7 +285,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 285 | 285 | ## 1. 检查ES文档的分面字段数据 |
| 286 | 286 | |
| 287 | 287 | ### 1.1 查询特定租户的商品,显示分面相关字段 |
| 288 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 288 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 289 | 289 | "query": { |
| 290 | 290 | "term": { |
| 291 | 291 | "tenant_id": "162" |
| ... | ... | @@ -306,7 +306,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 306 | 306 | }' |
| 307 | 307 | |
| 308 | 308 | ### 1.2 验证category1_name字段是否有数据 |
| 309 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 309 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 310 | 310 | "query": { |
| 311 | 311 | "bool": { |
| 312 | 312 | "filter": [ |
| ... | ... | @@ -319,7 +319,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 319 | 319 | }' |
| 320 | 320 | |
| 321 | 321 | ### 1.3 验证specifications字段是否有数据 |
| 322 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 322 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 323 | 323 | "query": { |
| 324 | 324 | "bool": { |
| 325 | 325 | "filter": [ |
| ... | ... | @@ -334,7 +334,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 334 | 334 | ## 2. 分面聚合查询(Facet Aggregations) |
| 335 | 335 | |
| 336 | 336 | ### 2.1 category1_name 分面聚合 |
| 337 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 337 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 338 | 338 | "query": { |
| 339 | 339 | "match_all": {} |
| 340 | 340 | }, |
| ... | ... | @@ -350,7 +350,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 350 | 350 | }' |
| 351 | 351 | |
| 352 | 352 | ### 2.2 specifications.color 分面聚合 |
| 353 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 353 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 354 | 354 | "query": { |
| 355 | 355 | "match_all": {} |
| 356 | 356 | }, |
| ... | ... | @@ -382,7 +382,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 382 | 382 | }' |
| 383 | 383 | |
| 384 | 384 | ### 2.3 specifications.size 分面聚合 |
| 385 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 385 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 386 | 386 | "query": { |
| 387 | 387 | "match_all": {} |
| 388 | 388 | }, |
| ... | ... | @@ -414,7 +414,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 414 | 414 | }' |
| 415 | 415 | |
| 416 | 416 | ### 2.4 specifications.material 分面聚合 |
| 417 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 417 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 418 | 418 | "query": { |
| 419 | 419 | "match_all": {} |
| 420 | 420 | }, |
| ... | ... | @@ -446,7 +446,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 446 | 446 | }' |
| 447 | 447 | |
| 448 | 448 | ### 2.5 综合分面聚合(category + color + size + material) |
| 449 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 449 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 450 | 450 | "query": { |
| 451 | 451 | "match_all": {} |
| 452 | 452 | }, |
| ... | ... | @@ -530,7 +530,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 530 | 530 | ## 3. 检查specifications嵌套字段的详细结构 |
| 531 | 531 | |
| 532 | 532 | ### 3.1 查看specifications的name字段有哪些值 |
| 533 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 533 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 534 | 534 | "query": { |
| 535 | 535 | "term": { |
| 536 | 536 | "tenant_id": "162" |
| ... | ... | @@ -555,7 +555,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 555 | 555 | }' |
| 556 | 556 | |
| 557 | 557 | ### 3.2 查看某个商品的完整specifications数据 |
| 558 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 558 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 559 | 559 | "query": { |
| 560 | 560 | "bool": { |
| 561 | 561 | "filter": [ |
| ... | ... | @@ -571,7 +571,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products/_s |
| 571 | 571 | ## 4. 统计查询 |
| 572 | 572 | |
| 573 | 573 | ### 4.1 统计有category1_name的文档数量 |
| 574 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 574 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 575 | 575 | "query": { |
| 576 | 576 | "bool": { |
| 577 | 577 | "filter": [ |
| ... | ... | @@ -582,7 +582,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 582 | 582 | }' |
| 583 | 583 | |
| 584 | 584 | ### 4.2 统计有specifications的文档数量 |
| 585 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 585 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_count?pretty' -H 'Content-Type: application/json' -d '{ | |
| 586 | 586 | "query": { |
| 587 | 587 | "bool": { |
| 588 | 588 | "filter": [ |
| ... | ... | @@ -596,7 +596,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 596 | 596 | ## 5. 诊断问题场景 |
| 597 | 597 | |
| 598 | 598 | ### 5.1 查找没有category1_name但有category的文档(MySQL有数据但ES没有) |
| 599 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 599 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 600 | 600 | "query": { |
| 601 | 601 | "bool": { |
| 602 | 602 | "filter": [ |
| ... | ... | @@ -612,7 +612,7 @@ curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_te |
| 612 | 612 | }' |
| 613 | 613 | |
| 614 | 614 | ### 5.2 查找有option但没有specifications的文档(数据转换问题) |
| 615 | -curl -u 'essa:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 615 | +curl -u 'saas:4hOaLaf41y2VuI8y' -X GET 'http://localhost:9200/search_products_tenant_162/_search?pretty' -H 'Content-Type: application/json' -d '{ | |
| 616 | 616 | "query": { |
| 617 | 617 | "bool": { |
| 618 | 618 | "filter": [ | ... | ... |
docs/搜索API对接指南.md
| ... | ... | @@ -64,7 +64,7 @@ |
| 64 | 64 | |
| 65 | 65 | ### 1.1 基础信息 |
| 66 | 66 | |
| 67 | -- **Base URL**: `http://120.76.41.98:6002` | |
| 67 | +- **Base URL**: `http://43.166.252.75:6002` | |
| 68 | 68 | - **协议**: HTTP/HTTPS |
| 69 | 69 | - **数据格式**: JSON |
| 70 | 70 | - **字符编码**: UTF-8 |
| ... | ... | @@ -75,7 +75,7 @@ |
| 75 | 75 | ### 1.2 最简单的搜索请求 |
| 76 | 76 | |
| 77 | 77 | ```bash |
| 78 | -curl -X POST "http://120.76.41.98:6002/search/" \ | |
| 78 | +curl -X POST "http://43.166.252.75:6002/search/" \ | |
| 79 | 79 | -H "Content-Type: application/json" \ |
| 80 | 80 | -H "X-Tenant-ID: 162" \ |
| 81 | 81 | -d '{"query": "芭比娃娃"}' |
| ... | ... | @@ -84,7 +84,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 84 | 84 | ### 1.3 带过滤与分页的搜索 |
| 85 | 85 | |
| 86 | 86 | ```bash |
| 87 | -curl -X POST "http://120.76.41.98:6002/search/" \ | |
| 87 | +curl -X POST "http://43.166.252.75:6002/search/" \ | |
| 88 | 88 | -H "Content-Type: application/json" \ |
| 89 | 89 | -H "X-Tenant-ID: 162" \ |
| 90 | 90 | -d '{ |
| ... | ... | @@ -108,7 +108,7 @@ curl -X POST "http://120.76.41.98:6002/search/" \ |
| 108 | 108 | ### 1.4 开启分面的搜索 |
| 109 | 109 | |
| 110 | 110 | ```bash |
| 111 | -curl -X POST "http://120.76.41.98:6002/search/" \ | |
| 111 | +curl -X POST "http://43.166.252.75:6002/search/" \ | |
| 112 | 112 | -H "Content-Type: application/json" \ |
| 113 | 113 | -H "X-Tenant-ID: 162" \ |
| 114 | 114 | -d '{ | ... | ... |
docs/环境配置说明.md
| ... | ... | @@ -11,14 +11,36 @@ |
| 11 | 11 | |
| 12 | 12 | ## 2. Python 运行环境 |
| 13 | 13 | |
| 14 | -**推荐方式(与项目脚本一致)**:使用项目根目录下的 `activate.sh` 激活环境,会自动加载当前目录下的 `.env`(忽略注释与空行): | |
| 14 | +本项目历史上使用 Conda 管理环境;目前推荐使用 **venv**(更轻量、对 CI/容器更友好)。项目根目录下的 `activate.sh` 已升级为 **优先激活 `.venv`,并兼容 Conda 回退**,且会自动加载当前目录下的 `.env`(忽略注释与空行)。 | |
| 15 | + | |
| 16 | +### 2.1 venv(推荐) | |
| 17 | + | |
| 18 | +首次创建 venv: | |
| 19 | + | |
| 20 | +```bash | |
| 21 | +cd /data/saas-search | |
| 22 | +./scripts/create_venv.sh | |
| 23 | +source activate.sh | |
| 24 | +``` | |
| 25 | + | |
| 26 | +如需运行本地 embedding / 图像编码服务(torch/transformers 等依赖较重): | |
| 15 | 27 | |
| 16 | 28 | ```bash |
| 17 | 29 | cd /data/saas-search |
| 30 | +INSTALL_ML=1 ./scripts/create_venv.sh | |
| 18 | 31 | source activate.sh |
| 19 | 32 | ``` |
| 20 | 33 | |
| 21 | -`activate.sh` 会激活 Conda 环境 `searchengine`。若在新机器上部署,请先设置本机 Conda 路径再执行: | |
| 34 | +日常使用: | |
| 35 | + | |
| 36 | +```bash | |
| 37 | +cd /data/saas-search | |
| 38 | +source activate.sh | |
| 39 | +``` | |
| 40 | + | |
| 41 | +### 2.2 Conda(兼容旧流程) | |
| 42 | + | |
| 43 | +`activate.sh` 会在未发现 `.venv` 时回退激活 Conda 环境 `searchengine`。若在新机器上部署,请先设置本机 Conda 路径再执行: | |
| 22 | 44 | |
| 23 | 45 | ```bash |
| 24 | 46 | # 你的 conda 在 ~/anaconda3/bin/conda,则 CONDA_ROOT=~/anaconda3 |
| ... | ... | @@ -26,7 +48,7 @@ export CONDA_ROOT=$HOME/anaconda3 # 或你的 Conda 安装路径(如 /home/u |
| 26 | 48 | source activate.sh |
| 27 | 49 | ``` |
| 28 | 50 | |
| 29 | -**新机器首次部署(创建环境)**:若本机尚未创建 `searchengine` 环境,任选其一: | |
| 51 | +**新机器首次部署(创建 Conda 环境)**:若本机尚未创建 `searchengine` 环境,任选其一: | |
| 30 | 52 | |
| 31 | 53 | - **方式 A(推荐,与 environment.yml 一致)**: |
| 32 | 54 | ```bash |
| ... | ... | @@ -82,7 +104,7 @@ DB_PASSWORD=P89cZHS5d7dFyc9R |
| 82 | 104 | |
| 83 | 105 | # Elasticsearch |
| 84 | 106 | ES_HOST=http://localhost:9200 |
| 85 | -ES_USERNAME=essa | |
| 107 | +ES_USERNAME=saas | |
| 86 | 108 | ES_PASSWORD=4hOaLaf41y2VuI8y |
| 87 | 109 | |
| 88 | 110 | # Redis(可选) |
| ... | ... | @@ -105,7 +127,7 @@ API_PORT=6002 |
| 105 | 127 | | 项目 | 值 | |
| 106 | 128 | |------|----| |
| 107 | 129 | | **MySQL** | host `120.79.247.228`, port `3316`, user `saas`, password `P89cZHS5d7dFyc9R` | |
| 108 | -| **Elasticsearch** | host `http://localhost:9200`, user `essa`, password `4hOaLaf41y2VuI8y` | | |
| 130 | +| **Elasticsearch** | host `http://localhost:9200`, user `saas`, password `4hOaLaf41y2VuI8y` | | |
| 109 | 131 | | **Redis(可选)** | host `localhost`, port `6479`, password `BMfv5aI31kgHWtlx` | |
| 110 | 132 | | **DeepL** | `c9293ab4-ad25-479b-919f-ab4e63b429ed` | |
| 111 | 133 | ... | ... |
frontend/README.md
| ... | ... | @@ -82,7 +82,7 @@ bash scripts/start_backend.sh |
| 82 | 82 | ``` |
| 83 | 83 | |
| 84 | 84 | 2. **访问前端**: |
| 85 | -打开浏览器访问:`http://120.76.41.98:6002/` | |
| 85 | +打开浏览器访问:`http://43.166.252.75:6002/` | |
| 86 | 86 | |
| 87 | 87 | ### 搜索示例 |
| 88 | 88 | |
| ... | ... | @@ -97,7 +97,7 @@ bash scripts/start_backend.sh |
| 97 | 97 | 前端通过以下接口与后端通信: |
| 98 | 98 | |
| 99 | 99 | ```javascript |
| 100 | -POST http://120.76.41.98:6002/search/ | |
| 100 | +POST http://43.166.252.75:6002/search/ | |
| 101 | 101 | |
| 102 | 102 | 请求体: |
| 103 | 103 | { | ... | ... |
indexer/README.md
| ... | ... | @@ -198,7 +198,7 @@ categoryPath.set(categoryLang, translationCategoryPath) |
| 198 | 198 | 你当前要使用的翻译接口(Python 侧): |
| 199 | 199 | |
| 200 | 200 | ```bash |
| 201 | -curl -X POST http://120.76.41.98:6006/translate \ | |
| 201 | +curl -X POST http://43.166.252.75:6006/translate \ | |
| 202 | 202 | -H "Content-Type: application/json" \ |
| 203 | 203 | -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣", |
| 204 | 204 | "target_lang":"en", | ... | ... |
indexer/prompts.txt
| ... | ... | @@ -22,7 +22,7 @@ java索引程序职责: |
| 22 | 22 | 本模块: |
| 23 | 23 | 负责 msyql 基础数据 → 索引结构的doc (包括缓存) |
| 24 | 24 | |
| 25 | -翻译接口: curl -X POST http://120.76.41.98:6006/translate -H "Content-Type: application/json" -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣","target_lang":"en","source_lang":"auto"}' | |
| 25 | +翻译接口: curl -X POST http://43.166.252.75:6006/translate -H "Content-Type: application/json" -d '{"text":"儿童小男孩女孩开学 100 天衬衫短袖 搞笑图案字母印花庆祝上衣","target_lang":"en","source_lang":"auto"}' | |
| 26 | 26 | |
| 27 | 27 | java的组织doc的逻辑都需要迁移过来。 |
| 28 | 28 | ... | ... |
query/query_parser.py
| ... | ... | @@ -8,7 +8,6 @@ from typing import Dict, List, Optional, Any, Union |
| 8 | 8 | import numpy as np |
| 9 | 9 | import logging |
| 10 | 10 | import re |
| 11 | -import hanlp | |
| 12 | 11 | from concurrent.futures import Future, ThreadPoolExecutor, as_completed |
| 13 | 12 | |
| 14 | 13 | from embeddings import BgeEncoder |
| ... | ... | @@ -19,6 +18,10 @@ from .query_rewriter import QueryRewriter, QueryNormalizer |
| 19 | 18 | |
| 20 | 19 | logger = logging.getLogger(__name__) |
| 21 | 20 | |
| 21 | +try: | |
| 22 | + import hanlp # type: ignore | |
| 23 | +except Exception: # pragma: no cover | |
| 24 | + hanlp = None | |
| 22 | 25 | |
| 23 | 26 | class ParsedQuery: |
| 24 | 27 | """Container for parsed query results.""" |
| ... | ... | @@ -94,12 +97,22 @@ class QueryParser: |
| 94 | 97 | self.language_detector = LanguageDetector() |
| 95 | 98 | self.rewriter = QueryRewriter(config.query_config.rewrite_dictionary) |
| 96 | 99 | |
| 97 | - # Initialize HanLP components at startup | |
| 98 | - logger.info("Initializing HanLP components...") | |
| 99 | - self._tok = hanlp.load(hanlp.pretrained.tok.CTB9_TOK_ELECTRA_BASE_CRF) | |
| 100 | - self._tok.config.output_spans = True | |
| 101 | - self._pos_tag = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL) | |
| 102 | - logger.info("HanLP components initialized") | |
| 100 | + # Optional HanLP components (heavy). If unavailable, fall back to a lightweight tokenizer. | |
| 101 | + self._tok = None | |
| 102 | + self._pos_tag = None | |
| 103 | + if hanlp is not None: | |
| 104 | + try: | |
| 105 | + logger.info("Initializing HanLP components...") | |
| 106 | + self._tok = hanlp.load(hanlp.pretrained.tok.CTB9_TOK_ELECTRA_BASE_CRF) | |
| 107 | + self._tok.config.output_spans = True | |
| 108 | + self._pos_tag = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL) | |
| 109 | + logger.info("HanLP components initialized") | |
| 110 | + except Exception as e: | |
| 111 | + logger.warning(f"HanLP init failed, falling back to simple tokenizer: {e}") | |
| 112 | + self._tok = None | |
| 113 | + self._pos_tag = None | |
| 114 | + else: | |
| 115 | + logger.info("HanLP not installed; using simple tokenizer") | |
| 103 | 116 | |
| 104 | 117 | @property |
| 105 | 118 | def text_encoder(self) -> BgeEncoder: |
| ... | ... | @@ -121,32 +134,51 @@ class QueryParser: |
| 121 | 134 | translation_context=self.config.query_config.translation_context |
| 122 | 135 | ) |
| 123 | 136 | return self._translator |
| 137 | + | |
| 138 | + def _simple_tokenize(self, text: str) -> List[str]: | |
| 139 | + """ | |
| 140 | + Lightweight tokenizer fallback. | |
| 141 | + | |
| 142 | + - Groups consecutive CJK chars as a token | |
| 143 | + - Groups consecutive latin/digits/underscore/dash as a token | |
| 144 | + """ | |
| 145 | + if not text: | |
| 146 | + return [] | |
| 147 | + pattern = re.compile(r"[\u4e00-\u9fff]+|[A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)*") | |
| 148 | + return pattern.findall(text) | |
| 124 | 149 | |
| 125 | 150 | def _extract_keywords(self, query: str) -> str: |
| 126 | 151 | """Extract keywords (nouns with length > 1) from query.""" |
| 127 | - tok_result = self._tok(query) | |
| 128 | - if not tok_result: | |
| 129 | - return "" | |
| 130 | - | |
| 131 | - words = [x[0] for x in tok_result] | |
| 132 | - pos_tags = self._pos_tag(words) | |
| 133 | - | |
| 134 | - keywords = [] | |
| 135 | - for word, pos in zip(words, pos_tags): | |
| 136 | - if len(word) > 1 and pos.startswith('N'): | |
| 137 | - keywords.append(word) | |
| 138 | - | |
| 152 | + if self._tok is not None and self._pos_tag is not None: | |
| 153 | + tok_result = self._tok(query) | |
| 154 | + if not tok_result: | |
| 155 | + return "" | |
| 156 | + words = [x[0] for x in tok_result] | |
| 157 | + pos_tags = self._pos_tag(words) | |
| 158 | + keywords = [] | |
| 159 | + for word, pos in zip(words, pos_tags): | |
| 160 | + if len(word) > 1 and isinstance(pos, str) and pos.startswith("N"): | |
| 161 | + keywords.append(word) | |
| 162 | + return " ".join(keywords) | |
| 163 | + | |
| 164 | + # Fallback: treat tokens with length > 1 as "keywords" | |
| 165 | + tokens = self._simple_tokenize(query) | |
| 166 | + keywords = [t for t in tokens if len(t) > 1] | |
| 139 | 167 | return " ".join(keywords) |
| 140 | 168 | |
| 141 | 169 | def _get_token_count(self, query: str) -> int: |
| 142 | - """Get token count using HanLP.""" | |
| 143 | - tok_result = self._tok(query) | |
| 144 | - return len(tok_result) if tok_result else 0 | |
| 170 | + """Get token count (HanLP if available, otherwise simple).""" | |
| 171 | + if self._tok is not None: | |
| 172 | + tok_result = self._tok(query) | |
| 173 | + return len(tok_result) if tok_result else 0 | |
| 174 | + return len(self._simple_tokenize(query)) | |
| 145 | 175 | |
| 146 | 176 | def _get_query_tokens(self, query: str) -> List[str]: |
| 147 | - """Get token list using HanLP.""" | |
| 148 | - tok_result = self._tok(query) | |
| 149 | - return [x[0] for x in tok_result] if tok_result else [] | |
| 177 | + """Get token list (HanLP if available, otherwise simple).""" | |
| 178 | + if self._tok is not None: | |
| 179 | + tok_result = self._tok(query) | |
| 180 | + return [x[0] for x in tok_result] if tok_result else [] | |
| 181 | + return self._simple_tokenize(query) | |
| 150 | 182 | |
| 151 | 183 | def parse( |
| 152 | 184 | self, | ... | ... |
requirements.txt
| ... | ... | @@ -12,14 +12,13 @@ pandas>=2.0.0 |
| 12 | 12 | # Elasticsearch |
| 13 | 13 | elasticsearch>=8.0.0,<9.0.0 |
| 14 | 14 | |
| 15 | -# ML/Embeddings | |
| 16 | -torch>=2.0.0 | |
| 17 | -sentence-transformers>=2.2.0 | |
| 18 | -transformers>=4.30.0 | |
| 19 | -modelscope>=1.9.0 | |
| 20 | -cn-clip>=1.5.0 | |
| 15 | +# Redis (cache; used by translator/embeddings) | |
| 16 | +redis>=5.0.0 | |
| 17 | + | |
| 18 | +# Math / vector utilities (used across modules) | |
| 21 | 19 | numpy>=1.24.0 |
| 22 | -pillow>=10.0.0 | |
| 20 | + | |
| 21 | +# LLM/Translation clients | |
| 23 | 22 | openai>=1.0.0 |
| 24 | 23 | |
| 25 | 24 | # API | ... | ... |
| ... | ... | @@ -0,0 +1,16 @@ |
| 1 | +# Optional heavy dependencies for local embedding/image encoding. | |
| 2 | +# | |
| 3 | +# Install when you need: | |
| 4 | +# - `./scripts/start_embedding_service.sh` (local embeddings server) | |
| 5 | +# - local BGE-M3 / CN-CLIP inference | |
| 6 | +# | |
| 7 | +# Notes: | |
| 8 | +# - `torch` wheels can be very large; if you want CPU-only wheels, | |
| 9 | +# consider installing torch separately with the official CPU index. | |
| 10 | +# | |
| 11 | +torch>=2.0.0 | |
| 12 | +sentence-transformers>=2.2.0 | |
| 13 | +transformers>=4.30.0 | |
| 14 | +modelscope>=1.9.0 | |
| 15 | +cn-clip>=1.5.0 | |
| 16 | +pillow>=10.0.0 | ... | ... |
| ... | ... | @@ -0,0 +1,59 @@ |
| 1 | +#!/bin/bash | |
| 2 | +# | |
| 3 | +# Create and initialize Python venv for saas-search. | |
| 4 | +# | |
| 5 | +# Usage: | |
| 6 | +# ./scripts/create_venv.sh | |
| 7 | +# | |
| 8 | +set -euo pipefail | |
| 9 | + | |
| 10 | +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" | |
| 11 | +cd "${PROJECT_ROOT}" | |
| 12 | + | |
| 13 | +VENV_DIR="${PROJECT_ROOT}/.venv" | |
| 14 | + | |
| 15 | +PYTHON_BIN="${PYTHON_BIN:-python3.10}" | |
| 16 | +if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then | |
| 17 | + PYTHON_BIN="python3" | |
| 18 | +fi | |
| 19 | + | |
| 20 | +echo "Using python: $(${PYTHON_BIN} --version)" | |
| 21 | + | |
| 22 | +if ! "${PYTHON_BIN}" -c "import ensurepip" >/dev/null 2>&1; then | |
| 23 | + echo "ERROR: ensurepip is not available for ${PYTHON_BIN}." >&2 | |
| 24 | + echo "On Ubuntu/Debian, install the venv package first, e.g.:" >&2 | |
| 25 | + echo " sudo apt-get update -y && sudo apt-get install -y python3-venv" >&2 | |
| 26 | + echo "If you are using Python 3.12 specifically, you may need:" >&2 | |
| 27 | + echo " sudo apt-get install -y python3.12-venv" >&2 | |
| 28 | + exit 1 | |
| 29 | +fi | |
| 30 | + | |
| 31 | +if [[ -d "${VENV_DIR}" ]]; then | |
| 32 | + if [[ -f "${VENV_DIR}/bin/activate" ]]; then | |
| 33 | + echo "venv already exists at ${VENV_DIR}" | |
| 34 | + else | |
| 35 | + echo "Found incomplete venv at ${VENV_DIR}, recreating..." | |
| 36 | + rm -rf "${VENV_DIR}" | |
| 37 | + "${PYTHON_BIN}" -m venv "${VENV_DIR}" | |
| 38 | + fi | |
| 39 | +else | |
| 40 | + echo "Creating venv at ${VENV_DIR} ..." | |
| 41 | + "${PYTHON_BIN}" -m venv "${VENV_DIR}" | |
| 42 | +fi | |
| 43 | + | |
| 44 | +# shellcheck disable=SC1091 | |
| 45 | +source "${VENV_DIR}/bin/activate" | |
| 46 | + | |
| 47 | +python -m pip install --upgrade pip setuptools wheel | |
| 48 | +python -m pip install -r requirements.txt | |
| 49 | + | |
| 50 | +if [[ "${INSTALL_ML:-0}" == "1" ]]; then | |
| 51 | + echo | |
| 52 | + echo "INSTALL_ML=1 detected. Installing optional ML dependencies..." | |
| 53 | + python -m pip install -r requirements_ml.txt | |
| 54 | +fi | |
| 55 | + | |
| 56 | +echo | |
| 57 | +echo "Done." | |
| 58 | +echo "Next:" | |
| 59 | +echo " source activate.sh" | ... | ... |
scripts/mock_data.sh
| ... | ... | @@ -20,9 +20,7 @@ |
| 20 | 20 | # ============================================================================ |
| 21 | 21 | |
| 22 | 22 | cd "$(dirname "$0")/.." |
| 23 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | |
| 24 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | |
| 25 | -conda activate searchengine | |
| 23 | +source ./activate.sh | |
| 26 | 24 | |
| 27 | 25 | GREEN='\033[0;32m' |
| 28 | 26 | YELLOW='\033[1;33m' |
| ... | ... | @@ -33,13 +31,6 @@ echo -e "${GREEN}========================================${NC}" |
| 33 | 31 | echo -e "${GREEN}Mock Data Script${NC}" |
| 34 | 32 | echo -e "${GREEN}========================================${NC}" |
| 35 | 33 | |
| 36 | -# Load config from .env file if it exists | |
| 37 | -if [ -f .env ]; then | |
| 38 | - set -a | |
| 39 | - source .env | |
| 40 | - set +a | |
| 41 | -fi | |
| 42 | - | |
| 43 | 34 | # ============================================================================ |
| 44 | 35 | # 写死的配置参数(不需要配置化,这是测试数据构造脚本) |
| 45 | 36 | # ============================================================================ | ... | ... |
scripts/start_backend.sh
| ... | ... | @@ -5,9 +5,7 @@ |
| 5 | 5 | set -e |
| 6 | 6 | |
| 7 | 7 | cd "$(dirname "$0")/.." |
| 8 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | |
| 9 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | |
| 10 | -conda activate searchengine | |
| 8 | +source ./activate.sh | |
| 11 | 9 | |
| 12 | 10 | GREEN='\033[0;32m' |
| 13 | 11 | YELLOW='\033[1;33m' |
| ... | ... | @@ -17,13 +15,6 @@ echo -e "${GREEN}========================================${NC}" |
| 17 | 15 | echo -e "${GREEN}Starting Backend API Service${NC}" |
| 18 | 16 | echo -e "${GREEN}========================================${NC}" |
| 19 | 17 | |
| 20 | -# Load config from .env file if it exists | |
| 21 | -if [ -f .env ]; then | |
| 22 | - set -a | |
| 23 | - source .env | |
| 24 | - set +a | |
| 25 | -fi | |
| 26 | - | |
| 27 | 18 | echo -e "\n${YELLOW}Configuration:${NC}" |
| 28 | 19 | echo " API Host: ${API_HOST:-0.0.0.0}" |
| 29 | 20 | echo " API Port: ${API_PORT:-6002}" | ... | ... |
scripts/start_embedding_service.sh
| ... | ... | @@ -12,12 +12,7 @@ set -e |
| 12 | 12 | |
| 13 | 13 | cd "$(dirname "$0")/.." |
| 14 | 14 | |
| 15 | -# Load conda env if available (keep consistent with other scripts) | |
| 16 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | |
| 17 | -if [ -f "$CONDA_ROOT/etc/profile.d/conda.sh" ]; then | |
| 18 | - source "$CONDA_ROOT/etc/profile.d/conda.sh" | |
| 19 | - conda activate searchengine | |
| 20 | -fi | |
| 15 | +source ./activate.sh | |
| 21 | 16 | |
| 22 | 17 | EMBEDDING_SERVICE_HOST=$(python -c "from embeddings.config import CONFIG; print(CONFIG.HOST)") |
| 23 | 18 | EMBEDDING_SERVICE_PORT=$(python -c "from embeddings.config import CONFIG; print(CONFIG.PORT)") | ... | ... |
scripts/start_frontend.sh
scripts/start_indexer.sh
| ... | ... | @@ -5,9 +5,7 @@ |
| 5 | 5 | set -e |
| 6 | 6 | |
| 7 | 7 | cd "$(dirname "$0")/.." |
| 8 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | |
| 9 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | |
| 10 | -conda activate searchengine | |
| 8 | +source ./activate.sh | |
| 11 | 9 | |
| 12 | 10 | GREEN='\033[0;32m' |
| 13 | 11 | YELLOW='\033[1;33m' |
| ... | ... | @@ -17,13 +15,6 @@ echo -e "${GREEN}========================================${NC}" |
| 17 | 15 | echo -e "${GREEN}Starting Indexer API Service${NC}" |
| 18 | 16 | echo -e "${GREEN}========================================${NC}" |
| 19 | 17 | |
| 20 | -# Load config from .env file if it exists | |
| 21 | -if [ -f .env ]; then | |
| 22 | - set -a | |
| 23 | - source .env | |
| 24 | - set +a | |
| 25 | -fi | |
| 26 | - | |
| 27 | 18 | echo -e "\n${YELLOW}Configuration:${NC}" |
| 28 | 19 | echo " INDEXER Host: ${INDEXER_HOST:-0.0.0.0}" |
| 29 | 20 | echo " INDEXER Port: ${INDEXER_PORT:-6004}" | ... | ... |
scripts/tenant3__csv_to_shoplazza_xlsx.sh
setup.sh
| 1 | 1 | #!/bin/bash |
| 2 | 2 | |
| 3 | -CONDA_ROOT="${CONDA_ROOT:-/home/tw/miniconda3}" | |
| 4 | -source "$CONDA_ROOT/etc/profile.d/conda.sh" | |
| 5 | - | |
| 6 | 3 | # saas-search Setup and Startup Script |
| 7 | 4 | # This script sets up the environment and starts all services |
| 8 | 5 | |
| ... | ... | @@ -22,24 +19,16 @@ echo -e "${GREEN}========================================${NC}" |
| 22 | 19 | cd "$(dirname "$0")" |
| 23 | 20 | PROJECT_ROOT=$(pwd) |
| 24 | 21 | |
| 25 | -echo -e "\n${YELLOW}Step 1: Setting up Conda environment${NC}" | |
| 26 | -# Check if conda is available | |
| 27 | -if ! command -v conda &> /dev/null; then | |
| 28 | - echo -e "${RED}Error: conda not found. Please install Miniconda or Anaconda${NC}" | |
| 29 | - exit 1 | |
| 30 | -fi | |
| 22 | +echo -e "\n${YELLOW}Step 1: Setting up Python environment (venv preferred)${NC}" | |
| 31 | 23 | |
| 32 | -# Check if environment exists | |
| 33 | -if conda env list | grep -q "searchengine"; then | |
| 34 | - echo -e "${GREEN}Environment 'searchengine' already exists${NC}" | |
| 35 | - conda activate searchengine | |
| 36 | -else | |
| 37 | - echo -e "${YELLOW}Creating conda environment 'searchengine'...${NC}" | |
| 38 | - conda env create -f environment.yml | |
| 39 | - conda activate searchengine | |
| 40 | - echo -e "${GREEN}Environment created successfully!${NC}" | |
| 24 | +if [ ! -f "${PROJECT_ROOT}/.venv/bin/activate" ]; then | |
| 25 | + echo -e "${YELLOW}Creating venv and installing dependencies...${NC}" | |
| 26 | + ./scripts/create_venv.sh | |
| 41 | 27 | fi |
| 42 | 28 | |
| 29 | +# Activate environment + load .env | |
| 30 | +source ./activate.sh | |
| 31 | + | |
| 43 | 32 | # Verify environment |
| 44 | 33 | echo -e "\n${YELLOW}Current Python version:${NC}" |
| 45 | 34 | python --version |
| ... | ... | @@ -74,7 +63,7 @@ echo -e "${GREEN}Setup Complete!${NC}" |
| 74 | 63 | echo -e "${GREEN}========================================${NC}" |
| 75 | 64 | echo "" |
| 76 | 65 | echo -e "Next steps:" |
| 77 | -echo -e " 1. Ingest data: ${YELLOW}./scripts/ingest.sh${NC}" | |
| 78 | -echo -e " 2. Start backend: ${YELLOW}./scripts/start_backend.sh${NC}" | |
| 66 | +echo -e " 1. Start backend: ${YELLOW}./scripts/start_backend.sh${NC}" | |
| 67 | +echo -e " 2. Start indexer: ${YELLOW}./scripts/start_indexer.sh${NC}" | |
| 79 | 68 | echo -e " 3. Start frontend: ${YELLOW}./scripts/start_frontend.sh${NC}" |
| 80 | 69 | echo "" | ... | ... |