Commit 286e9b4f2263cb08873b9f3ef506a304070c0861
1 parent
310bb3bc
evalution
Showing
5 changed files
with
91 additions
and
95 deletions
Show diff stats
scripts/evaluation/eval_framework/cli.py
| @@ -17,16 +17,6 @@ from .web_app import create_web_app | @@ -17,16 +17,6 @@ from .web_app import create_web_app | ||
| 17 | _cli_log = logging.getLogger("search_eval.cli") | 17 | _cli_log = logging.getLogger("search_eval.cli") |
| 18 | 18 | ||
| 19 | 19 | ||
| 20 | -def _filter_queries_from_start(queries: list[str], start_from_query: str | None) -> list[str]: | ||
| 21 | - if not start_from_query: | ||
| 22 | - return queries | ||
| 23 | - try: | ||
| 24 | - start_idx = queries.index(start_from_query) | ||
| 25 | - except ValueError as exc: | ||
| 26 | - raise SystemExit(f"start-from-query not found in queries file: {start_from_query!r}") from exc | ||
| 27 | - return queries[start_idx:] | ||
| 28 | - | ||
| 29 | - | ||
| 30 | def _reset_build_artifacts() -> None: | 20 | def _reset_build_artifacts() -> None: |
| 31 | from config.loader import get_app_config | 21 | from config.loader import get_app_config |
| 32 | 22 | ||
| @@ -238,11 +228,6 @@ def build_cli_parser() -> argparse.ArgumentParser: | @@ -238,11 +228,6 @@ def build_cli_parser() -> argparse.ArgumentParser: | ||
| 238 | help="Default: search_evaluation.default_language.", | 228 | help="Default: search_evaluation.default_language.", |
| 239 | ) | 229 | ) |
| 240 | build.add_argument( | 230 | build.add_argument( |
| 241 | - "--start-from-query", | ||
| 242 | - default=None, | ||
| 243 | - help="Start processing from this exact query text in the queries file.", | ||
| 244 | - ) | ||
| 245 | - build.add_argument( | ||
| 246 | "--reset-artifacts", | 231 | "--reset-artifacts", |
| 247 | action="store_true", | 232 | action="store_true", |
| 248 | help="Delete rebuild cache/artifacts (SQLite + query_builds) before starting.", | 233 | help="Delete rebuild cache/artifacts (SQLite + query_builds) before starting.", |
| @@ -257,11 +242,6 @@ def build_cli_parser() -> argparse.ArgumentParser: | @@ -257,11 +242,6 @@ def build_cli_parser() -> argparse.ArgumentParser: | ||
| 257 | batch.add_argument("--queries-file", default=None, help="Default: search_evaluation.queries_file.") | 242 | batch.add_argument("--queries-file", default=None, help="Default: search_evaluation.queries_file.") |
| 258 | batch.add_argument("--top-k", type=int, default=None, help="Default: search_evaluation.batch_top_k.") | 243 | batch.add_argument("--top-k", type=int, default=None, help="Default: search_evaluation.batch_top_k.") |
| 259 | batch.add_argument("--language", default=None, help="Default: search_evaluation.default_language.") | 244 | batch.add_argument("--language", default=None, help="Default: search_evaluation.default_language.") |
| 260 | - batch.add_argument( | ||
| 261 | - "--start-from-query", | ||
| 262 | - default=None, | ||
| 263 | - help="Start processing from this exact query text in the queries file.", | ||
| 264 | - ) | ||
| 265 | batch.add_argument("--force-refresh-labels", action="store_true") | 245 | batch.add_argument("--force-refresh-labels", action="store_true") |
| 266 | add_judge_llm_args(batch) | 246 | add_judge_llm_args(batch) |
| 267 | add_intent_llm_args(batch) | 247 | add_intent_llm_args(batch) |
| @@ -272,11 +252,6 @@ def build_cli_parser() -> argparse.ArgumentParser: | @@ -272,11 +252,6 @@ def build_cli_parser() -> argparse.ArgumentParser: | ||
| 272 | audit.add_argument("--top-k", type=int, default=None, help="Default: search_evaluation.audit_top_k.") | 252 | audit.add_argument("--top-k", type=int, default=None, help="Default: search_evaluation.audit_top_k.") |
| 273 | audit.add_argument("--language", default=None, help="Default: search_evaluation.default_language.") | 253 | audit.add_argument("--language", default=None, help="Default: search_evaluation.default_language.") |
| 274 | audit.add_argument( | 254 | audit.add_argument( |
| 275 | - "--start-from-query", | ||
| 276 | - default=None, | ||
| 277 | - help="Start processing from this exact query text in the queries file.", | ||
| 278 | - ) | ||
| 279 | - audit.add_argument( | ||
| 280 | "--limit-suspicious", | 255 | "--limit-suspicious", |
| 281 | type=int, | 256 | type=int, |
| 282 | default=None, | 257 | default=None, |
| @@ -302,7 +277,6 @@ def run_build(args: argparse.Namespace) -> None: | @@ -302,7 +277,6 @@ def run_build(args: argparse.Namespace) -> None: | ||
| 302 | _reset_build_artifacts() | 277 | _reset_build_artifacts() |
| 303 | framework = SearchEvaluationFramework(tenant_id=args.tenant_id, **framework_kwargs_from_args(args)) | 278 | framework = SearchEvaluationFramework(tenant_id=args.tenant_id, **framework_kwargs_from_args(args)) |
| 304 | queries = framework.queries_from_file(Path(args.queries_file)) | 279 | queries = framework.queries_from_file(Path(args.queries_file)) |
| 305 | - queries = _filter_queries_from_start(queries, args.start_from_query) | ||
| 306 | summary = [] | 280 | summary = [] |
| 307 | rebuild_kwargs = {} | 281 | rebuild_kwargs = {} |
| 308 | if args.force_refresh_labels: | 282 | if args.force_refresh_labels: |
| @@ -320,17 +294,21 @@ def run_build(args: argparse.Namespace) -> None: | @@ -320,17 +294,21 @@ def run_build(args: argparse.Namespace) -> None: | ||
| 320 | total_q = len(queries) | 294 | total_q = len(queries) |
| 321 | for q_index, query in enumerate(queries, start=1): | 295 | for q_index, query in enumerate(queries, start=1): |
| 322 | _cli_log.info("[build] (%s/%s) starting query=%r", q_index, total_q, query) | 296 | _cli_log.info("[build] (%s/%s) starting query=%r", q_index, total_q, query) |
| 323 | - result = framework.build_query_annotation_set( | ||
| 324 | - query=query, | ||
| 325 | - search_depth=args.search_depth, | ||
| 326 | - rerank_depth=args.rerank_depth, | ||
| 327 | - annotate_search_top_k=args.annotate_search_top_k, | ||
| 328 | - annotate_rerank_top_k=args.annotate_rerank_top_k, | ||
| 329 | - language=args.language, | ||
| 330 | - force_refresh_rerank=args.force_refresh_rerank, | ||
| 331 | - force_refresh_labels=args.force_refresh_labels, | ||
| 332 | - **rebuild_kwargs, | ||
| 333 | - ) | 297 | + try: |
| 298 | + result = framework.build_query_annotation_set( | ||
| 299 | + query=query, | ||
| 300 | + search_depth=args.search_depth, | ||
| 301 | + rerank_depth=args.rerank_depth, | ||
| 302 | + annotate_search_top_k=args.annotate_search_top_k, | ||
| 303 | + annotate_rerank_top_k=args.annotate_rerank_top_k, | ||
| 304 | + language=args.language, | ||
| 305 | + force_refresh_rerank=args.force_refresh_rerank, | ||
| 306 | + force_refresh_labels=args.force_refresh_labels, | ||
| 307 | + **rebuild_kwargs, | ||
| 308 | + ) | ||
| 309 | + except Exception: | ||
| 310 | + _cli_log.exception("[build] failed query=%r index=%s/%s", query, q_index, total_q) | ||
| 311 | + raise | ||
| 334 | summary.append( | 312 | summary.append( |
| 335 | { | 313 | { |
| 336 | "query": result.query, | 314 | "query": result.query, |
| @@ -358,22 +336,24 @@ def run_build(args: argparse.Namespace) -> None: | @@ -358,22 +336,24 @@ def run_build(args: argparse.Namespace) -> None: | ||
| 358 | def run_batch(args: argparse.Namespace) -> None: | 336 | def run_batch(args: argparse.Namespace) -> None: |
| 359 | framework = SearchEvaluationFramework(tenant_id=args.tenant_id, **framework_kwargs_from_args(args)) | 337 | framework = SearchEvaluationFramework(tenant_id=args.tenant_id, **framework_kwargs_from_args(args)) |
| 360 | queries = framework.queries_from_file(Path(args.queries_file)) | 338 | queries = framework.queries_from_file(Path(args.queries_file)) |
| 361 | - queries = _filter_queries_from_start(queries, args.start_from_query) | ||
| 362 | _cli_log.info("[batch] queries_file=%s count=%s", args.queries_file, len(queries)) | 339 | _cli_log.info("[batch] queries_file=%s count=%s", args.queries_file, len(queries)) |
| 363 | - payload = framework.batch_evaluate( | ||
| 364 | - queries=queries, | ||
| 365 | - top_k=args.top_k, | ||
| 366 | - auto_annotate=True, | ||
| 367 | - language=args.language, | ||
| 368 | - force_refresh_labels=args.force_refresh_labels, | ||
| 369 | - ) | 340 | + try: |
| 341 | + payload = framework.batch_evaluate( | ||
| 342 | + queries=queries, | ||
| 343 | + top_k=args.top_k, | ||
| 344 | + auto_annotate=True, | ||
| 345 | + language=args.language, | ||
| 346 | + force_refresh_labels=args.force_refresh_labels, | ||
| 347 | + ) | ||
| 348 | + except Exception: | ||
| 349 | + _cli_log.exception("[batch] failed while evaluating query list from %s", args.queries_file) | ||
| 350 | + raise | ||
| 370 | _cli_log.info("[done] batch_id=%s aggregate_metrics=%s", payload["batch_id"], payload["aggregate_metrics"]) | 351 | _cli_log.info("[done] batch_id=%s aggregate_metrics=%s", payload["batch_id"], payload["aggregate_metrics"]) |
| 371 | 352 | ||
| 372 | 353 | ||
| 373 | def run_audit(args: argparse.Namespace) -> None: | 354 | def run_audit(args: argparse.Namespace) -> None: |
| 374 | framework = SearchEvaluationFramework(tenant_id=args.tenant_id, **framework_kwargs_from_args(args)) | 355 | framework = SearchEvaluationFramework(tenant_id=args.tenant_id, **framework_kwargs_from_args(args)) |
| 375 | queries = framework.queries_from_file(Path(args.queries_file)) | 356 | queries = framework.queries_from_file(Path(args.queries_file)) |
| 376 | - queries = _filter_queries_from_start(queries, args.start_from_query) | ||
| 377 | audit_items = [] | 357 | audit_items = [] |
| 378 | for query in queries: | 358 | for query in queries: |
| 379 | item = framework.audit_live_query( | 359 | item = framework.audit_live_query( |
scripts/evaluation/eval_framework/clients.py
| @@ -212,6 +212,8 @@ class DashScopeLabelClient: | @@ -212,6 +212,8 @@ class DashScopeLabelClient: | ||
| 212 | self.enable_thinking = bool(enable_thinking) | 212 | self.enable_thinking = bool(enable_thinking) |
| 213 | self.use_batch = bool(use_batch) | 213 | self.use_batch = bool(use_batch) |
| 214 | self.session = requests.Session() | 214 | self.session = requests.Session() |
| 215 | + self.retry_attempts = 4 | ||
| 216 | + self.retry_delay_sec = 3.0 | ||
| 215 | 217 | ||
| 216 | def _auth_headers(self) -> Dict[str, str]: | 218 | def _auth_headers(self) -> Dict[str, str]: |
| 217 | return {"Authorization": f"Bearer {self.api_key}"} | 219 | return {"Authorization": f"Bearer {self.api_key}"} |
| @@ -320,18 +322,41 @@ class DashScopeLabelClient: | @@ -320,18 +322,41 @@ class DashScopeLabelClient: | ||
| 320 | return content, safe_json_dumps(row) | 322 | return content, safe_json_dumps(row) |
| 321 | 323 | ||
| 322 | def _chat(self, prompt: str, *, phase: str = "chat") -> Tuple[str, str]: | 324 | def _chat(self, prompt: str, *, phase: str = "chat") -> Tuple[str, str]: |
| 323 | - if not self.use_batch: | ||
| 324 | - content, raw = self._chat_sync(prompt) | ||
| 325 | - else: | 325 | + last_exc: Exception | None = None |
| 326 | + for attempt in range(1, self.retry_attempts + 1): | ||
| 326 | try: | 327 | try: |
| 327 | - content, raw = self._chat_batch(prompt) | ||
| 328 | - except requests.exceptions.HTTPError as e: | ||
| 329 | - resp = getattr(e, "response", None) | ||
| 330 | - if resp is not None and resp.status_code == 404: | ||
| 331 | - self.use_batch = False | 328 | + if not self.use_batch: |
| 332 | content, raw = self._chat_sync(prompt) | 329 | content, raw = self._chat_sync(prompt) |
| 333 | else: | 330 | else: |
| 331 | + try: | ||
| 332 | + content, raw = self._chat_batch(prompt) | ||
| 333 | + except requests.exceptions.HTTPError as e: | ||
| 334 | + resp = getattr(e, "response", None) | ||
| 335 | + if resp is not None and resp.status_code == 404: | ||
| 336 | + self.use_batch = False | ||
| 337 | + content, raw = self._chat_sync(prompt) | ||
| 338 | + else: | ||
| 339 | + raise | ||
| 340 | + break | ||
| 341 | + except Exception as exc: | ||
| 342 | + last_exc = exc | ||
| 343 | + is_request_error = isinstance(exc, requests.exceptions.RequestException) | ||
| 344 | + if not is_request_error or attempt >= self.retry_attempts: | ||
| 334 | raise | 345 | raise |
| 346 | + _client_log.warning( | ||
| 347 | + "Transient DashScope error, retrying (%s/%s): phase=%s model=%s use_batch=%s error=%s", | ||
| 348 | + attempt, | ||
| 349 | + self.retry_attempts, | ||
| 350 | + phase, | ||
| 351 | + self.model, | ||
| 352 | + self.use_batch, | ||
| 353 | + exc, | ||
| 354 | + ) | ||
| 355 | + time.sleep(self.retry_delay_sec) | ||
| 356 | + else: | ||
| 357 | + if last_exc is not None: | ||
| 358 | + raise last_exc | ||
| 359 | + raise RuntimeError(f"unexpected DashScope retry state for phase={phase}") | ||
| 335 | _log_eval_llm_verbose( | 360 | _log_eval_llm_verbose( |
| 336 | phase=phase, | 361 | phase=phase, |
| 337 | model=self.model, | 362 | model=self.model, |
scripts/evaluation/eval_framework/framework.py
| @@ -335,6 +335,12 @@ class SearchEvaluationFramework: | @@ -335,6 +335,12 @@ class SearchEvaluationFramework: | ||
| 335 | ) | 335 | ) |
| 336 | return [(labels, raw_response, docs)] | 336 | return [(labels, raw_response, docs)] |
| 337 | except Exception: | 337 | except Exception: |
| 338 | + _log.exception( | ||
| 339 | + "[eval-rebuild] classify failed query=%r docs=%s; %s", | ||
| 340 | + query, | ||
| 341 | + len(docs), | ||
| 342 | + "splitting batch" if len(docs) > 1 else "single-doc failure", | ||
| 343 | + ) | ||
| 338 | if len(docs) == 1: | 344 | if len(docs) == 1: |
| 339 | raise | 345 | raise |
| 340 | mid = len(docs) // 2 | 346 | mid = len(docs) // 2 |
| @@ -382,6 +388,15 @@ class SearchEvaluationFramework: | @@ -382,6 +388,15 @@ class SearchEvaluationFramework: | ||
| 382 | if not batch_docs: | 388 | if not batch_docs: |
| 383 | break | 389 | break |
| 384 | 390 | ||
| 391 | + _log.info( | ||
| 392 | + "[eval-rebuild] query=%r starting llm_batch=%s/%s size=%s offset=%s", | ||
| 393 | + query, | ||
| 394 | + batch_idx + 1, | ||
| 395 | + max_batches, | ||
| 396 | + len(batch_docs), | ||
| 397 | + start, | ||
| 398 | + ) | ||
| 399 | + | ||
| 385 | batch_pairs = self._classify_with_retry(query, batch_docs, force_refresh=force_refresh) | 400 | batch_pairs = self._classify_with_retry(query, batch_docs, force_refresh=force_refresh) |
| 386 | for sub_labels, raw_response, sub_batch in batch_pairs: | 401 | for sub_labels, raw_response, sub_batch in batch_pairs: |
| 387 | to_store = {str(doc.get("spu_id")): label for doc, label in zip(sub_batch, sub_labels)} | 402 | to_store = {str(doc.get("spu_id")): label for doc, label in zip(sub_batch, sub_labels)} |
scripts/evaluation/eval_framework/utils.py
| @@ -84,22 +84,12 @@ def build_label_doc_line(idx: int, doc: Dict[str, Any]) -> str: | @@ -84,22 +84,12 @@ def build_label_doc_line(idx: int, doc: Dict[str, Any]) -> str: | ||
| 84 | option1, option2, option3 = compact_option_values(doc.get("skus") or []) | 84 | option1, option2, option3 = compact_option_values(doc.get("skus") or []) |
| 85 | vendor = pick_text(doc.get("vendor"), "en") | 85 | vendor = pick_text(doc.get("vendor"), "en") |
| 86 | category = pick_text(doc.get("category_path"), "en") or pick_text(doc.get("category_name"), "en") | 86 | category = pick_text(doc.get("category_path"), "en") or pick_text(doc.get("category_name"), "en") |
| 87 | - tags = doc.get("tags") or [] | ||
| 88 | - tags_text = ", ".join(str(tag) for tag in tags[:4] if tag) | ||
| 89 | parts = [title] | 87 | parts = [title] |
| 90 | if option1: | 88 | if option1: |
| 91 | - parts.append(f"option1={option1}") | 89 | + parts.append(f"{option1}") |
| 92 | if option2: | 90 | if option2: |
| 93 | - parts.append(f"option2={option2}") | ||
| 94 | - if option3: | ||
| 95 | - parts.append(f"option3={option3}") | ||
| 96 | - if vendor: | ||
| 97 | - parts.append(f"vendor={vendor}") | ||
| 98 | - if category: | ||
| 99 | - parts.append(f"category={category}") | ||
| 100 | - if tags_text: | ||
| 101 | - parts.append(f"tags={tags_text}") | ||
| 102 | - return f"{idx}. " + " | ".join(part for part in parts if part) | 91 | + parts.append(f"{option2}") |
| 92 | + return f"{idx}. " + " ".join(part for part in parts if part) | ||
| 103 | 93 | ||
| 104 | 94 | ||
| 105 | def compact_product_payload(doc: Dict[str, Any]) -> Dict[str, Any]: | 95 | def compact_product_payload(doc: Dict[str, Any]) -> Dict[str, Any]: |
| @@ -109,8 +99,7 @@ def compact_product_payload(doc: Dict[str, Any]) -> Dict[str, Any]: | @@ -109,8 +99,7 @@ def compact_product_payload(doc: Dict[str, Any]) -> Dict[str, Any]: | ||
| 109 | "image_url": doc.get("image_url"), | 99 | "image_url": doc.get("image_url"), |
| 110 | "vendor": pick_text(doc.get("vendor"), "en"), | 100 | "vendor": pick_text(doc.get("vendor"), "en"), |
| 111 | "category": pick_text(doc.get("category_path"), "en") or pick_text(doc.get("category_name"), "en"), | 101 | "category": pick_text(doc.get("category_path"), "en") or pick_text(doc.get("category_name"), "en"), |
| 112 | - "option_values": list(compact_option_values(doc.get("skus") or [])), | ||
| 113 | - "tags": list((doc.get("tags") or [])[:6]), | 102 | + "option_values": list(compact_option_values(doc.get("skus") or [])) |
| 114 | } | 103 | } |
| 115 | 104 | ||
| 116 | 105 |
scripts/evaluation/start_eval.sh
| @@ -7,46 +7,33 @@ cd "$ROOT" | @@ -7,46 +7,33 @@ cd "$ROOT" | ||
| 7 | PY="${ROOT}/.venv/bin/python" | 7 | PY="${ROOT}/.venv/bin/python" |
| 8 | TENANT_ID="${TENANT_ID:-163}" | 8 | TENANT_ID="${TENANT_ID:-163}" |
| 9 | QUERIES="${REPO_EVAL_QUERIES:-scripts/evaluation/queries/queries.txt}" | 9 | QUERIES="${REPO_EVAL_QUERIES:-scripts/evaluation/queries/queries.txt}" |
| 10 | -START_FROM_QUERY="${REPO_EVAL_START_FROM_QUERY:-}" | ||
| 11 | 10 | ||
| 12 | usage() { | 11 | usage() { |
| 13 | echo "Usage: $0 batch|batch-rebuild|serve" | 12 | echo "Usage: $0 batch|batch-rebuild|serve" |
| 14 | echo " batch — batch eval: live search every query, LLM only for missing labels (top_k=50)" | 13 | echo " batch — batch eval: live search every query, LLM only for missing labels (top_k=50)" |
| 15 | echo " batch-rebuild — deep rebuild: build --force-refresh-labels (search recall pool + full-corpus rerank + batched LLM; expensive)" | 14 | echo " batch-rebuild — deep rebuild: build --force-refresh-labels (search recall pool + full-corpus rerank + batched LLM; expensive)" |
| 16 | echo " serve — eval UI (default http://0.0.0.0:\${EVAL_WEB_PORT:-6010}/; also: ./scripts/start_eval_web.sh)" | 15 | echo " serve — eval UI (default http://0.0.0.0:\${EVAL_WEB_PORT:-6010}/; also: ./scripts/start_eval_web.sh)" |
| 17 | - echo "Env: TENANT_ID (default 163), REPO_EVAL_QUERIES, REPO_EVAL_START_FROM_QUERY, EVAL_WEB_HOST, EVAL_WEB_PORT (default 6010)" | 16 | + echo "Env: TENANT_ID (default 163), REPO_EVAL_QUERIES, EVAL_WEB_HOST, EVAL_WEB_PORT (default 6010)" |
| 18 | } | 17 | } |
| 19 | 18 | ||
| 20 | case "${1:-}" in | 19 | case "${1:-}" in |
| 21 | batch) | 20 | batch) |
| 22 | - cmd=( | ||
| 23 | - "$PY" scripts/evaluation/build_annotation_set.py batch | ||
| 24 | - --tenant-id "$TENANT_ID" | ||
| 25 | - --queries-file "$QUERIES" | ||
| 26 | - --top-k 50 | 21 | + exec "$PY" scripts/evaluation/build_annotation_set.py batch \ |
| 22 | + --tenant-id "$TENANT_ID" \ | ||
| 23 | + --queries-file "$QUERIES" \ | ||
| 24 | + --top-k 50 \ | ||
| 27 | --language en | 25 | --language en |
| 28 | - ) | ||
| 29 | - if [ -n "$START_FROM_QUERY" ]; then | ||
| 30 | - cmd+=(--start-from-query "$START_FROM_QUERY") | ||
| 31 | - fi | ||
| 32 | - exec "${cmd[@]}" | ||
| 33 | ;; | 26 | ;; |
| 34 | batch-rebuild) | 27 | batch-rebuild) |
| 35 | - cmd=( | ||
| 36 | - "$PY" scripts/evaluation/build_annotation_set.py build | ||
| 37 | - --tenant-id "$TENANT_ID" | ||
| 38 | - --queries-file "$QUERIES" | ||
| 39 | - --search-depth 500 | ||
| 40 | - --rerank-depth 10000 | ||
| 41 | - --reset-artifacts | ||
| 42 | - --force-refresh-rerank | ||
| 43 | - --force-refresh-labels | 28 | + exec "$PY" scripts/evaluation/build_annotation_set.py build \ |
| 29 | + --tenant-id "$TENANT_ID" \ | ||
| 30 | + --queries-file "$QUERIES" \ | ||
| 31 | + --search-depth 500 \ | ||
| 32 | + --rerank-depth 10000 \ | ||
| 33 | + --reset-artifacts \ | ||
| 34 | + --force-refresh-rerank \ | ||
| 35 | + --force-refresh-labels \ | ||
| 44 | --language en | 36 | --language en |
| 45 | - ) | ||
| 46 | - if [ -n "$START_FROM_QUERY" ]; then | ||
| 47 | - cmd+=(--start-from-query "$START_FROM_QUERY") | ||
| 48 | - fi | ||
| 49 | - exec "${cmd[@]}" | ||
| 50 | ;; | 37 | ;; |
| 51 | serve) | 38 | serve) |
| 52 | EVAL_WEB_PORT="${EVAL_WEB_PORT:-6010}" | 39 | EVAL_WEB_PORT="${EVAL_WEB_PORT:-6010}" |