From 363578ca373adf7ffd21ec33a8833b5dfeef6d9b Mon Sep 17 00:00:00 2001
From: tangwang <tangwang@essa.top>
Date: Thu, 26 Feb 2026 16:59:57 +0800
Subject: [PATCH] **feat: robust thinking support for OpenAI and DashScope**

---
 app/agents/shopping_agent.py | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 app/config.py                |   6 ++++--
 2 files changed, 160 insertions(+), 9 deletions(-)
diff --git a/app/agents/shopping_agent.py b/app/agents/shopping_agent.py
index fada652..55b162b 100644
--- a/app/agents/shopping_agent.py
+++ b/app/agents/shopping_agent.py
@@ -11,11 +11,13 @@ Architecture:
 import json
 import logging
 import re
+from urllib.parse import urlparse
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Optional, Sequence
 
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
+from langchain_core.outputs import ChatResult
 from langchain_openai import ChatOpenAI
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.graph import END, START, StateGraph
@@ -110,6 +112,63 @@ def _extract_message_text(msg) -> str:
 
 # 部分 API（如 DeepSeek）在 content 中返回 think 标签块，需去掉后只保留正式回复
 _RE_THINK_TAGS = re.compile(r"<think>.*?<\/think>", re.DOTALL | re.IGNORECASE)
+# 仅提取 <think> 标签内正文（用于日志打印 thinking）
+_RE_THINK_INNER = re.compile(r"<think>(.*?)<\/think>", re.DOTALL | re.IGNORECASE)
+
+
+def _normalize_base_url(base_url: Optional[str]) -> str:
+    return (base_url or "").strip().rstrip("/")
+
+
+def _is_openai_official_base_url(base_url: Optional[str]) -> bool:
+    normalized = _normalize_base_url(base_url)
+    if not normalized:
+        return False
+    hostname = (urlparse(normalized).hostname or "").lower()
+    return hostname.endswith("api.openai.com")
+
+
+def _is_dashscope_base_url(base_url: Optional[str]) -> bool:
+    normalized = _normalize_base_url(base_url)
+    if not normalized:
+        return False
+    hostname = (urlparse(normalized).hostname or "").lower()
+    return "dashscope" in hostname
+
+
+def _coerce_reasoning_text(value: Any) -> str:
+    """Best-effort conversion from reasoning payload to plain text."""
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value.strip()
+    if isinstance(value, dict):
+        parts: list[str] = []
+        for key in ("content", "summary", "text", "reasoning_content"):
+            item = value.get(key)
+            if isinstance(item, str) and item.strip():
+                parts.append(item.strip())
+            elif isinstance(item, list):
+                for sub in item:
+                    s = _coerce_reasoning_text(sub)
+                    if s:
+                        parts.append(s)
+        if parts:
+            return "\n".join(parts).strip()
+        try:
+            return json.dumps(value, ensure_ascii=False)
+        except Exception:
+            return str(value).strip()
+    if isinstance(value, list):
+        parts = [_coerce_reasoning_text(v) for v in value]
+        joined = "\n".join(p for p in parts if p)
+        if joined:
+            return joined.strip()
+        try:
+            return json.dumps(value, ensure_ascii=False)
+        except Exception:
+            return str(value).strip()
+    return str(value).strip()
 
 
 def _extract_formal_reply(msg) -> str:
@@ -136,9 +195,42 @@ def _extract_formal_reply(msg) -> str:
     return str(content).strip() if content else ""
 
 
-def _message_for_log(msg: BaseMessage) -> dict:
+def _extract_thinking(msg) -> str:
+    """提取大模型回复中的 thinking/reasoning 内容（仅用于日志）。"""
+    kwargs = getattr(msg, "additional_kwargs", None) or {}
+    # DashScope 等兼容接口返回的 reasoning_content（由 ChatOpenAIWithReasoningContent 注入）
+    rc = _coerce_reasoning_text(kwargs.get("reasoning_content"))
+    if rc:
+        return rc
+    # Responses API 等返回的 reasoning 字段
+    reasoning = _coerce_reasoning_text(kwargs.get("reasoning"))
+    if reasoning:
+        return reasoning
+    content = getattr(msg, "content", "")
+    if isinstance(content, list):
+        parts = []
+        for block in content:
+            if not isinstance(block, dict):
+                continue
+            block_type = (block.get("type") or "").lower()
+            if block_type not in ("reasoning", "reasoning_content", "thinking"):
+                continue
+            text = _coerce_reasoning_text(block.get("text") or block.get("content") or block)
+            if text:
+                parts.append(text)
+        if parts:
+            return "".join(str(p) for p in parts).strip()
+    if isinstance(content, str):
+        m = _RE_THINK_INNER.search(content)
+        if m:
+            return m.group(1).strip()
+    return ""
+
+
+def _message_for_log(msg: BaseMessage, include_thinking: bool = False) -> dict:
     """Serialize a message for structured logging (content truncated)."""
-    if getattr(msg, "additional_kwargs", None) and "reasoning" in (msg.additional_kwargs or {}):
+    msg_kwargs = getattr(msg, "additional_kwargs", None) or {}
+    if msg_kwargs and any(k in msg_kwargs for k in ("reasoning", "reasoning_content")):
         text = _extract_formal_reply(msg) or _extract_message_text(msg)
     else:
         text = _extract_message_text(msg)
@@ -148,6 +240,12 @@ def _message_for_log(msg: BaseMessage) -> dict:
         "type": getattr(msg, "type", "unknown"),
         "content": text,
     }
+    if include_thinking:
+        thinking = _extract_thinking(msg)
+        if thinking:
+            if len(thinking) > _LOG_CONTENT_MAX:
+                thinking = thinking[:_LOG_CONTENT_MAX] + f"... [truncated, total {len(thinking)} chars]"
+            out["thinking"] = thinking
     if hasattr(msg, "tool_calls") and msg.tool_calls:
         out["tool_calls"] = [
             {"name": tc.get("name"), "args": tc.get("args", {})}
@@ -156,6 +254,38 @@ def _message_for_log(msg: BaseMessage) -> dict:
     return out
 
 
+# ── DashScope thinking 支持 ─────────────────────────────────────────────────────
+# LangChain 解析 chat completion 时不会把 API 返回的 reasoning_content 写入 message，
+# 子类在 _create_chat_result 中把 reasoning_content 注入到 additional_kwargs，便于日志打印。
+
+class ChatOpenAIWithReasoningContent(ChatOpenAI):
+    """ChatOpenAI 子类：将 API 返回的 reasoning_content 注入到 message.additional_kwargs。"""
+
+    def _create_chat_result(
+        self,
+        response: Any,
+        generation_info: Optional[dict] = None,
+    ) -> ChatResult:
+        result = super()._create_chat_result(response, generation_info)
+        if isinstance(response, dict):
+            response_dict = response
+        else:
+            response_dict = getattr(response, "model_dump", None)
+            response_dict = response_dict() if callable(response_dict) else {}
+        if not response_dict:
+            return result
+        choices = response_dict.get("choices") or []
+        for i, res in enumerate(choices):
+            if i >= len(result.generations):
+                break
+            msg_dict = res.get("message") or {}
+            if isinstance(msg_dict, dict) and "reasoning_content" in msg_dict:
+                rc = msg_dict["reasoning_content"]
+                if rc and isinstance(result.generations[i].message, BaseMessage):
+                    result.generations[i].message.additional_kwargs["reasoning_content"] = rc
+        return result
+
+
 # ── Agent class ────────────────────────────────────────────────────────────────
 
 class ShoppingAgent:
@@ -169,14 +299,33 @@ class ShoppingAgent:
             temperature=settings.openai_temperature,
             api_key=settings.openai_api_key,
         )
-        if settings.openai_api_base_url:
-            llm_kwargs["base_url"] = settings.openai_api_base_url
-        if getattr(settings, "openai_use_reasoning", False):
+        base_url = _normalize_base_url(settings.openai_api_base_url)
+        if base_url:
+            llm_kwargs["base_url"] = base_url
+
+        use_reasoning = getattr(settings, "openai_use_reasoning", False)
+        if use_reasoning and (not base_url or _is_openai_official_base_url(base_url)):
+            # OpenAI 官方 endpoint：使用 Responses API 的 reasoning 参数。
             llm_kwargs["use_responses_api"] = True
             effort = getattr(settings, "openai_reasoning_effort", "medium") or "medium"
             llm_kwargs["model_kwargs"] = {"reasoning": {"effort": effort, "summary": "none"}}
+        elif use_reasoning and _is_dashscope_base_url(base_url):
+            # DashScope 兼容 endpoint：通过 extra_body 开启思考，返回 reasoning_content。
+            extra = llm_kwargs.get("extra_body") or {}
+            llm_kwargs["extra_body"] = {**extra, "enable_thinking": True}
+        elif use_reasoning and base_url:
+            logger.info(
+                "Reasoning requested but base_url is non-OpenAI/non-DashScope; "
+                "skipping provider-specific reasoning params. base_url=%s",
+                base_url,
+            )
 
-        self.llm = ChatOpenAI(**llm_kwargs)
+        llm_class = (
+            ChatOpenAIWithReasoningContent
+            if base_url and not _is_openai_official_base_url(base_url)
+            else ChatOpenAI
+        )
+        self.llm = llm_class(**llm_kwargs)
 
         # Tools are session-bound so search_products writes to the right registry partition
         self.tools = get_all_tools(session_id=self.session_id, registry=global_registry)
@@ -196,7 +345,7 @@ class ShoppingAgent:
                 req_json = req_json[:_LOG_CONTENT_MAX] + f"... [truncated total {len(req_json)}]"
             logger.info("[%s] LLM_REQUEST messages=%s", self.session_id, req_json)
             response = self.llm_with_tools.invoke(messages)
-            response_log = _message_for_log(response)
+            response_log = _message_for_log(response, include_thinking=True)
             logger.info(
                 "[%s] LLM_RESPONSE %s",
                 self.session_id,
diff --git a/app/config.py b/app/config.py
index 2948858..8879274 100644
--- a/app/config.py
+++ b/app/config.py
@@ -33,8 +33,10 @@ class Settings(BaseSettings):
     openai_vision_model: str = "qwen3-omni-flash"
     openai_temperature: float = 0.7
     openai_max_tokens: int = 1000
-    # 对话调用大模型时是否开启 thinking（需兼容 Responses API / reasoning 的模型，如 o1/o3/o4-mini）
-    openai_use_reasoning: bool = False
+    # 对话调用大模型时是否开启 thinking：
+    # - OpenAI 官方 endpoint（含 api.openai.com base_url）：走 Responses API reasoning
+    # - DashScope 兼容 endpoint：通过 extra_body.enable_thinking 开启
+    openai_use_reasoning: bool = True
     openai_reasoning_effort: str = "medium"  # low | medium | high
     # Base URL for OpenAI-compatible APIs (e.g. Qwen/DashScope)
     # Qwen 北京: https://dashscope.aliyuncs.com/compatible-mode/v1
--
libgit2 0.21.2