Blame view

scripts/rebuild_suggestions.sh 2.07 KB
ff9efda0   tangwang   suggest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
  #!/usr/bin/env bash
  set -euo pipefail
  
  if [ $# -lt 1 ]; then
    echo "Usage: $0 <tenant_id> [sample_query] [sample_language]"
    echo "Example: $0 162 shi en"
    exit 1
  fi
  
  ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
  TENANT_ID="$1"
  SAMPLE_Q="${2:-shi}"
  SAMPLE_LANG="${3:-en}"
  API_BASE="${API_BASE_URL:-http://localhost:6002}"
  
  cd "$ROOT_DIR"
  
  PY_BIN="${PYTHON_BIN:-$ROOT_DIR/.venv/bin/python}"
  if [ ! -x "$PY_BIN" ]; then
    PY_BIN="python3"
  fi
  
  if [ -z "${ES_HOST:-}" ]; then
    ES_HOST="$("$PY_BIN" - <<'PY'
  from dotenv import dotenv_values
  print(dotenv_values('.env').get('ES_HOST') or 'http://localhost:9200')
  PY
  )"
  fi
  
  if [ -z "${ES_USERNAME:-}" ] || [ -z "${ES_PASSWORD:-}" ]; then
    readarray -t _ES_CREDS < <("$PY_BIN" - <<'PY'
  from dotenv import dotenv_values
  cfg = dotenv_values('.env')
  print(cfg.get('ES_USERNAME') or '')
  print(cfg.get('ES_PASSWORD') or '')
  PY
  )
    ES_USERNAME="${ES_USERNAME:-${_ES_CREDS[0]}}"
    ES_PASSWORD="${ES_PASSWORD:-${_ES_CREDS[1]}}"
  fi
  
  if [ -n "${ES_USERNAME:-}" ] && [ -n "${ES_PASSWORD:-}" ]; then
    AUTH=(-u "${ES_USERNAME}:${ES_PASSWORD}")
  else
    AUTH=()
  fi
  
  ALIAS_NAME="${ES_INDEX_NAMESPACE:-}search_suggestions_tenant_${TENANT_ID}_current"
  
  echo "[1/4] Full rebuild tenant=${TENANT_ID} (versioned + alias publish)"
  "$PY_BIN" main.py build-suggestions \
    --tenant-id "$TENANT_ID" \
    --es-host "$ES_HOST" \
    --mode full \
    --days 365 \
    --batch-size 500 \
    --publish-alias \
    --keep-versions 2
  
  echo "[2/4] Incremental update tenant=${TENANT_ID}"
  "$PY_BIN" main.py build-suggestions \
    --tenant-id "$TENANT_ID" \
    --es-host "$ES_HOST" \
    --mode incremental \
    --overlap-minutes 30
  
  echo "[3/4] ES count + sample"
  curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_count?pretty"
  echo
  curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: application/json' -d '{
    "size": 5,
    "query": {"match_all": {}},
    "_source": ["lang", "text", "rank_score", "sources", "query_count_30d"]
  }'
  echo
  
  echo "[4/4] API smoke test"
  curl -sS "$API_BASE/search/suggestions?q=${SAMPLE_Q}&size=10&language=${SAMPLE_LANG}" -H "X-Tenant-ID: ${TENANT_ID}"
  echo