build_suggestions.sh
3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env bash
#
# Convenience script to rebuild suggestion index for a tenant.
#
# Usage:
# # full rebuild + alias publish (default)
# ./scripts/build_suggestions.sh <tenant_id> --mode full --days 30
#
# # incremental update from watermark
# ./scripts/build_suggestions.sh <tenant_id> --mode incremental
#
# # full rebuild + incremental + ES/API smoke checks (same as legacy rebuild_suggestions.sh)
# ./scripts/build_suggestions.sh <tenant_id> --rebuild-and-verify
#
set -euo pipefail
if [ $# -lt 1 ]; then
echo "Usage: $0 <tenant_id> [--rebuild-and-verify | extra args for main.py build-suggestions...]"
echo "Example (full): $0 162 --mode full --days 30 --publish-alias"
echo "Example (incremental): $0 162 --mode incremental --overlap-minutes 30"
echo "Example (pipeline + smoke): $0 162 --rebuild-and-verify"
exit 1
fi
TENANT_ID="$1"
shift || true
REBUILD_VERIFY=false
PASSTHROUGH_ARGS=()
for arg in "$@"; do
if [ "$arg" = "--rebuild-and-verify" ]; then
REBUILD_VERIFY=true
else
PASSTHROUGH_ARGS+=("$arg")
fi
done
if [ "$REBUILD_VERIFY" = true ] && [ ${#PASSTHROUGH_ARGS[@]} -gt 0 ]; then
echo "Error: --rebuild-and-verify cannot be combined with other build-suggestions arguments."
exit 1
fi
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
PY_BIN="${PYTHON_BIN:-$ROOT_DIR/.venv/bin/python}"
if [ ! -x "$PY_BIN" ]; then
PY_BIN="python3"
fi
if [ "$REBUILD_VERIFY" = true ]; then
# Fixed smoke-test queries and languages (no CLI args).
SAMPLE_QUERIES=(s sh dress tshirt)
SAMPLE_LANGS=(en zh)
API_BASE="${API_BASE_URL:-http://localhost:6002}"
if [ -z "${ES_HOST:-}" ]; then
ES_HOST="$("$PY_BIN" - <<'PY'
from dotenv import dotenv_values
print(dotenv_values('.env').get('ES_HOST') or 'http://localhost:9200')
PY
)"
fi
if [ -z "${ES_USERNAME:-}" ] || [ -z "${ES_PASSWORD:-}" ]; then
readarray -t _ES_CREDS < <("$PY_BIN" - <<'PY'
from dotenv import dotenv_values
cfg = dotenv_values('.env')
print(cfg.get('ES_USERNAME') or '')
print(cfg.get('ES_PASSWORD') or '')
PY
)
ES_USERNAME="${ES_USERNAME:-${_ES_CREDS[0]}}"
ES_PASSWORD="${ES_PASSWORD:-${_ES_CREDS[1]}}"
fi
if [ -n "${ES_USERNAME:-}" ] && [ -n "${ES_PASSWORD:-}" ]; then
AUTH=(-u "${ES_USERNAME}:${ES_PASSWORD}")
else
AUTH=()
fi
ALIAS_NAME="${ES_INDEX_NAMESPACE:-}search_suggestions_tenant_${TENANT_ID}_current"
echo "[1/4] Full rebuild tenant=${TENANT_ID} (versioned + alias publish)"
"$PY_BIN" main.py build-suggestions \
--tenant-id "$TENANT_ID" \
--es-host "$ES_HOST" \
--mode full \
--days 365 \
--batch-size 500 \
--publish-alias \
--keep-versions 2
echo "[2/4] Incremental update tenant=${TENANT_ID}"
"$PY_BIN" main.py build-suggestions \
--tenant-id "$TENANT_ID" \
--es-host "$ES_HOST" \
--mode incremental \
--overlap-minutes 30
echo "[3/4] ES count + sample"
curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_count?pretty"
echo
curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: application/json' -d '{
"size": 5,
"query": {"match_all": {}},
"_source": ["lang", "text", "rank_score", "sources", "query_count_30d"]
}'
echo
echo "[4/4] API smoke test"
for lang in "${SAMPLE_LANGS[@]}"; do
for q in "${SAMPLE_QUERIES[@]}"; do
echo "--- GET /search/suggestions?q=${q}&language=${lang} ---"
curl -sS "$API_BASE/search/suggestions?q=${q}&size=10&language=${lang}" -H "X-Tenant-ID: ${TENANT_ID}"
echo
done
done
exit 0
fi
"$PY_BIN" main.py build-suggestions \
--tenant-id "$TENANT_ID" \
"${PASSTHROUGH_ARGS[@]}"