f251cf2d
tangwang
suggestion全量索引程序跑通
|
1
2
3
4
5
|
#!/usr/bin/env bash
#
# Convenience script to rebuild suggestion index for a tenant.
#
# Usage:
|
ff9efda0
tangwang
suggest
|
6
7
8
9
10
|
# # full rebuild + alias publish (default)
# ./scripts/build_suggestions.sh <tenant_id> --mode full --days 30
#
# # incremental update from watermark
# ./scripts/build_suggestions.sh <tenant_id> --mode incremental
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
11
|
#
|
e50924ed
tangwang
1. tags -> enrich...
|
12
|
# # full rebuild + incremental + ES/API smoke checks (same as legacy rebuild_suggestions.sh)
|
9f33fe3c
tangwang
fix suggestion re...
|
13
|
# ./scripts/build_suggestions.sh <tenant_id> --rebuild
|
e50924ed
tangwang
1. tags -> enrich...
|
14
|
#
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
15
16
17
18
|
set -euo pipefail
if [ $# -lt 1 ]; then
|
9f33fe3c
tangwang
fix suggestion re...
|
19
20
21
22
|
echo "Usage: $0 <tenant_id> [--rebuild | extra args for main.py build-suggestions...]"
echo "Example (full): $0 163 --mode full --days 30 --publish-alias"
echo "Example (incremental): $0 163 --mode incremental --overlap-minutes 30"
echo "Example (pipeline + smoke): $0 163 --rebuild"
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
23
24
25
26
27
28
|
exit 1
fi
TENANT_ID="$1"
shift || true
|
9f33fe3c
tangwang
fix suggestion re...
|
29
|
RUN_REBUILD_PIPELINE=false
|
e50924ed
tangwang
1. tags -> enrich...
|
30
31
|
PASSTHROUGH_ARGS=()
for arg in "$@"; do
|
9f33fe3c
tangwang
fix suggestion re...
|
32
33
|
if [ "$arg" = "--rebuild" ]; then
RUN_REBUILD_PIPELINE=true
|
e50924ed
tangwang
1. tags -> enrich...
|
34
35
36
37
38
|
else
PASSTHROUGH_ARGS+=("$arg")
fi
done
|
9f33fe3c
tangwang
fix suggestion re...
|
39
40
|
if [ "$RUN_REBUILD_PIPELINE" = true ] && [ ${#PASSTHROUGH_ARGS[@]} -gt 0 ]; then
echo "Error: --rebuild cannot be combined with other build-suggestions arguments."
|
e50924ed
tangwang
1. tags -> enrich...
|
41
42
43
|
exit 1
fi
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
44
45
46
47
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
|
ff9efda0
tangwang
suggest
|
48
49
50
51
52
|
PY_BIN="${PYTHON_BIN:-$ROOT_DIR/.venv/bin/python}"
if [ ! -x "$PY_BIN" ]; then
PY_BIN="python3"
fi
|
9f33fe3c
tangwang
fix suggestion re...
|
53
|
if [ "$RUN_REBUILD_PIPELINE" = true ]; then
|
e50924ed
tangwang
1. tags -> enrich...
|
54
55
|
SAMPLE_QUERIES=(s sh dress tshirt)
SAMPLE_LANGS=(en zh)
|
9f33fe3c
tangwang
fix suggestion re...
|
56
57
58
|
# This script validates the locally rebuilt index, so default the smoke target
# to the local backend. A remote/public API base must be opted into explicitly.
API_BASE="${SUGGESTIONS_SMOKE_BASE_URL:-http://localhost:6002}"
|
e50924ed
tangwang
1. tags -> enrich...
|
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
if [ -z "${ES_HOST:-}" ]; then
ES_HOST="$("$PY_BIN" - <<'PY'
from dotenv import dotenv_values
print(dotenv_values('.env').get('ES_HOST') or 'http://localhost:9200')
PY
)"
fi
if [ -z "${ES_USERNAME:-}" ] || [ -z "${ES_PASSWORD:-}" ]; then
readarray -t _ES_CREDS < <("$PY_BIN" - <<'PY'
from dotenv import dotenv_values
cfg = dotenv_values('.env')
print(cfg.get('ES_USERNAME') or '')
print(cfg.get('ES_PASSWORD') or '')
PY
)
ES_USERNAME="${ES_USERNAME:-${_ES_CREDS[0]}}"
ES_PASSWORD="${ES_PASSWORD:-${_ES_CREDS[1]}}"
fi
if [ -n "${ES_USERNAME:-}" ] && [ -n "${ES_PASSWORD:-}" ]; then
AUTH=(-u "${ES_USERNAME}:${ES_PASSWORD}")
else
AUTH=()
fi
ALIAS_NAME="${ES_INDEX_NAMESPACE:-}search_suggestions_tenant_${TENANT_ID}_current"
echo "[1/4] Full rebuild tenant=${TENANT_ID} (versioned + alias publish)"
"$PY_BIN" main.py build-suggestions \
--tenant-id "$TENANT_ID" \
--es-host "$ES_HOST" \
--mode full \
--days 365 \
--batch-size 500 \
--publish-alias \
--keep-versions 2
echo "[2/4] Incremental update tenant=${TENANT_ID}"
"$PY_BIN" main.py build-suggestions \
--tenant-id "$TENANT_ID" \
--es-host "$ES_HOST" \
--mode incremental \
--overlap-minutes 30
echo "[3/4] ES count + sample"
curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_count?pretty"
echo
curl -sS "${AUTH[@]}" "$ES_HOST/$ALIAS_NAME/_search?pretty" -H 'Content-Type: application/json' -d '{
"size": 5,
"query": {"match_all": {}},
"_source": ["lang", "text", "rank_score", "sources", "query_count_30d"]
}'
echo
|
9f33fe3c
tangwang
fix suggestion re...
|
115
|
echo "[4/4] API smoke test (base=${API_BASE})"
|
e50924ed
tangwang
1. tags -> enrich...
|
116
117
118
119
120
121
122
123
124
125
|
for lang in "${SAMPLE_LANGS[@]}"; do
for q in "${SAMPLE_QUERIES[@]}"; do
echo "--- GET /search/suggestions?q=${q}&language=${lang} ---"
curl -sS "$API_BASE/search/suggestions?q=${q}&size=10&language=${lang}" -H "X-Tenant-ID: ${TENANT_ID}"
echo
done
done
exit 0
fi
|
ff9efda0
tangwang
suggest
|
126
|
"$PY_BIN" main.py build-suggestions \
|
f251cf2d
tangwang
suggestion全量索引程序跑通
|
127
|
--tenant-id "$TENANT_ID" \
|
e50924ed
tangwang
1. tags -> enrich...
|
128
|
"${PASSTHROUGH_ARGS[@]}"
|