#!/usr/bin/env bash # # Convenience script to rebuild suggestion index for a tenant. # # Usage: # # full rebuild + alias publish (default) # ./scripts/build_suggestions.sh --mode full --days 30 # # # incremental update from watermark # ./scripts/build_suggestions.sh --mode incremental # # # full rebuild + incremental + ES/API smoke checks (same as legacy rebuild_suggestions.sh) # ./scripts/build_suggestions.sh --rebuild # set -euo pipefail if [ $# -lt 1 ]; then echo "Usage: $0 [--rebuild | extra args for main.py build-suggestions...]" echo "Example (full): $0 163 --mode full --days 30 --publish-alias" echo "Example (incremental): $0 163 --mode incremental --overlap-minutes 30" echo "Example (pipeline + smoke): $0 163 --rebuild" exit 1 fi TENANT_ID="$1" shift || true RUN_REBUILD_PIPELINE=false PASSTHROUGH_ARGS=() for arg in "$@"; do if [ "$arg" = "--rebuild" ]; then RUN_REBUILD_PIPELINE=true else PASSTHROUGH_ARGS+=("$arg") fi done if [ "$RUN_REBUILD_PIPELINE" = true ] && [ ${#PASSTHROUGH_ARGS[@]} -gt 0 ]; then echo "Error: --rebuild cannot be combined with other build-suggestions arguments." exit 1 fi ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd "$ROOT_DIR" PY_BIN="${PYTHON_BIN:-$ROOT_DIR/.venv/bin/python}" if [ ! -x "$PY_BIN" ]; then PY_BIN="python3" fi if [ "$RUN_REBUILD_PIPELINE" = true ]; then SAMPLE_QUERIES=(s sh dress tshirt) SAMPLE_LANGS=(en zh) # This script validates the locally rebuilt index, so default the smoke target # to the local backend. A remote/public API base must be opted into explicitly. API_BASE="${SUGGESTIONS_SMOKE_BASE_URL:-http://localhost:6002}" if [ -z "${ES_HOST:-}" ]; then ES_HOST="$("$PY_BIN" - <<'PY' from dotenv import dotenv_values print(dotenv_values('.env').get('ES_HOST') or 'http://localhost:9200') PY )" fi if [ -z "${ES_USERNAME:-}" ] || [ -z "${ES_PASSWORD:-}" ]; then readarray -t _ES_CREDS < <("$PY_BIN" - <<'PY' from dotenv import dotenv_values cfg = dotenv_values('.env') print(cfg.get('ES_USERNAME') or '') print(cfg.get('ES_PASSWORD') or '') PY ) ES_USERNAME="${ES_USERNAME:-${_ES_CREDS[0]}}" ES_PASSWORD="${ES_PASSWORD:-${_ES_CREDS[1]}}" fi if [ -n "${ES_USERNAME:-}" ] && [ -n "${ES_PASSWORD:-}" ]; then AUTH=(-u "${ES_USERNAME}:${ES_PASSWORD}") else AUTH=() fi ALIAS_NAME="${ES_INDEX_NAMESPACE:-}search_suggestions_tenant_${TENANT_ID}_current" echo "[1/4] Full rebuild tenant=${TENANT_ID} (versioned + alias publish)" "$PY_BIN" main.py build-suggestions \ --tenant-id "$TENANT_ID" \ --es-host "$ES_HOST" \ --mode full \ --days 365 \ --batch-size 500 \ --publish-alias \ --keep-versions 2 echo "[2/4] Incremental update tenant=${TENANT_ID}" "$PY_BIN" main.py build-suggestions \ --tenant-id "$TENANT_ID" \ --es-host "$ES_HOST" \ --mode incremental \ --overlap-minutes 30 echo "[3/4] ES count + sample" curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_count?pretty" echo curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: application/json' -d '{ "size": 5, "query": {"match_all": {}}, "_source": ["lang", "text", "rank_score", "sources", "query_count_30d"] }' echo echo "[4/4] API smoke test (base=${API_BASE})" for lang in "${SAMPLE_LANGS[@]}"; do for q in "${SAMPLE_QUERIES[@]}"; do echo "--- GET /search/suggestions?q=${q}&language=${lang} ---" curl -sS "$API_BASE/search/suggestions?q=${q}&size=10&language=${lang}" -H "X-Tenant-ID: ${TENANT_ID}" echo done done exit 0 fi "$PY_BIN" main.py build-suggestions \ --tenant-id "$TENANT_ID" \ "${PASSTHROUGH_ARGS[@]}"