config.py
1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""Reranker service compatibility config derived from unified app config."""
from __future__ import annotations
import os
from config.loader import get_app_config
class RerankerConfig(object):
def __init__(self) -> None:
app_config = get_app_config()
service = app_config.services.rerank
instance_name = str(os.getenv("RERANK_INSTANCE") or service.default_instance).strip() or service.default_instance
instance = service.get_instance(instance_name)
backend = service.get_backend_config(instance_name)
request = service.request
self.INSTANCE = instance_name
self.HOST = str(os.getenv("RERANKER_HOST") or instance.host)
self.PORT = int(os.getenv("RERANKER_PORT") or instance.port)
self.BACKEND = str(os.getenv("RERANK_BACKEND") or instance.backend)
self.RUNTIME_DIR = str(
os.getenv("RERANKER_RUNTIME_DIR")
or instance.runtime_dir
or f"./.runtime/reranker/{instance_name}"
)
self.MODEL_NAME = str(backend.get("model_name") or "Qwen/Qwen3-Reranker-0.6B")
self.DEVICE = backend.get("device")
self.USE_FP16 = bool(backend.get("use_fp16", True))
self.BATCH_SIZE = int(backend.get("batch_size", backend.get("infer_batch_size", 64)))
self.MAX_LENGTH = int(backend.get("max_length", 512))
self.CACHE_DIR = str(backend.get("cache_dir") or "./model_cache")
self.ENABLE_WARMUP = bool(backend.get("enable_warmup", True))
self.MAX_DOCS = int(request.get("max_docs", 1000))
self.NORMALIZE = bool(request.get("normalize", True))
CONFIG = RerankerConfig()