rebuild_suggestions.sh
2.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env bash
set -euo pipefail
if [ $# -lt 1 ]; then
echo "Usage: $0 <tenant_id> [sample_query] [sample_language]"
echo "Example: $0 162 shi en"
exit 1
fi
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
TENANT_ID="$1"
SAMPLE_Q="${2:-shi}"
SAMPLE_LANG="${3:-en}"
API_BASE="${API_BASE_URL:-http://localhost:6002}"
cd "$ROOT_DIR"
PY_BIN="${PYTHON_BIN:-$ROOT_DIR/.venv/bin/python}"
if [ ! -x "$PY_BIN" ]; then
PY_BIN="python3"
fi
if [ -z "${ES_HOST:-}" ]; then
ES_HOST="$("$PY_BIN" - <<'PY'
from dotenv import dotenv_values
print(dotenv_values('.env').get('ES_HOST') or 'http://localhost:9200')
PY
)"
fi
if [ -z "${ES_USERNAME:-}" ] || [ -z "${ES_PASSWORD:-}" ]; then
readarray -t _ES_CREDS < <("$PY_BIN" - <<'PY'
from dotenv import dotenv_values
cfg = dotenv_values('.env')
print(cfg.get('ES_USERNAME') or '')
print(cfg.get('ES_PASSWORD') or '')
PY
)
ES_USERNAME="${ES_USERNAME:-${_ES_CREDS[0]}}"
ES_PASSWORD="${ES_PASSWORD:-${_ES_CREDS[1]}}"
fi
if [ -n "${ES_USERNAME:-}" ] && [ -n "${ES_PASSWORD:-}" ]; then
AUTH=(-u "${ES_USERNAME}:${ES_PASSWORD}")
else
AUTH=()
fi
ALIAS_NAME="${ES_INDEX_NAMESPACE:-}search_suggestions_tenant_${TENANT_ID}_current"
echo "[1/4] Full rebuild tenant=${TENANT_ID} (versioned + alias publish)"
"$PY_BIN" main.py build-suggestions \
--tenant-id "$TENANT_ID" \
--es-host "$ES_HOST" \
--mode full \
--days 365 \
--batch-size 500 \
--publish-alias \
--keep-versions 2
echo "[2/4] Incremental update tenant=${TENANT_ID}"
"$PY_BIN" main.py build-suggestions \
--tenant-id "$TENANT_ID" \
--es-host "$ES_HOST" \
--mode incremental \
--overlap-minutes 30
echo "[3/4] ES count + sample"
curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_count?pretty"
echo
curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: application/json' -d '{
"size": 5,
"query": {"match_all": {}},
"_source": ["lang", "text", "rank_score", "sources", "query_count_30d"]
}'
echo
echo "[4/4] API smoke test"
curl -sS "$API_BASE/search/suggestions?q=${SAMPLE_Q}&size=10&language=${SAMPLE_LANG}" -H "X-Tenant-ID: ${TENANT_ID}"
echo