Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 93 additions & 18 deletions deeptutor/agents/chat/agentic_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,75 @@
new_call_id,
)
from deeptutor.runtime.registry.tool_registry import get_tool_registry
from deeptutor.services.prompt import get_prompt_manager
from deeptutor.services.config import get_chat_params
from deeptutor.services.llm import (
clean_thinking_tags,
complete as llm_complete,
get_llm_config,
get_token_limit_kwargs,
prepare_multimodal_messages,
stream as llm_stream,
supports_response_format,
supports_tools,
)
from deeptutor.services.llm import (
stream as llm_stream,
)
from deeptutor.services.prompt import get_prompt_manager
from deeptutor.tools.builtin import BUILTIN_TOOL_NAMES
from deeptutor.utils.json_parser import parse_json_response

logger = logging.getLogger(__name__)

CHAT_EXCLUDED_TOOLS = {"geogebra_analysis"}
CHAT_OPTIONAL_TOOLS = [
name for name in BUILTIN_TOOL_NAMES if name not in CHAT_EXCLUDED_TOOLS
]
CHAT_OPTIONAL_TOOLS = [name for name in BUILTIN_TOOL_NAMES if name not in CHAT_EXCLUDED_TOOLS]
MAX_PARALLEL_TOOL_CALLS = 8
MAX_TOOL_RESULT_CHARS = 4000

CHAT_STAGE_KEYS: tuple[str, ...] = (
"responding",
"answer_now",
"thinking",
"observing",
"acting",
"react_fallback",
)


@dataclass
class _ChatLimits:
"""Per-stage ``max_tokens`` resolved from ``capabilities.chat`` in agents.yaml."""

responding: int
answer_now: int
thinking: int
observing: int
acting: int
react_fallback: int

@classmethod
def from_config(cls, cfg: dict[str, Any]) -> "_ChatLimits":
# Defaults below mirror DEFAULT_CHAT_PARAMS so the pipeline still works
# if the YAML block is missing entirely (e.g. minimal/legacy installs).
fallback = {
"responding": 8000,
"answer_now": 8000,
"thinking": 2000,
"observing": 2000,
"acting": 2000,
"react_fallback": 1500,
}
resolved: dict[str, int] = {}
for key in CHAT_STAGE_KEYS:
stage_cfg = cfg.get(key) if isinstance(cfg, dict) else None
if isinstance(stage_cfg, dict):
value = stage_cfg.get("max_tokens", fallback[key])
else:
value = fallback[key]
try:
resolved[key] = int(value)
except (TypeError, ValueError):
resolved[key] = fallback[key]
return cls(**resolved)


@dataclass
class ToolTrace:
Expand All @@ -68,6 +114,19 @@ def __init__(self, language: str = "en") -> None:
self.api_version = getattr(self.llm_config, "api_version", None)
self.registry = get_tool_registry()
self._usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "calls": 0}
# capabilities.chat in agents.yaml drives token budgets and temperature
# for every LLM call below; falls back to DEFAULT_CHAT_PARAMS if the
# block is missing.
try:
chat_cfg = get_chat_params()
except Exception as exc:
logger.warning("Failed to load chat params, using defaults: %s", exc)
chat_cfg = {}
try:
self._chat_temperature = float(chat_cfg.get("temperature", 0.2))
except (TypeError, ValueError):
self._chat_temperature = 0.2
self._chat_limits = _ChatLimits.from_config(chat_cfg)
# Prompts live in deeptutor/agents/chat/prompts/{zh,en}/agentic_chat.yaml
# so all user-visible / LLM-facing copy is editable without touching code.
try:
Expand Down Expand Up @@ -214,7 +273,9 @@ async def _stage_thinking(
)

chunks: list[str] = []
async for chunk in self._stream_messages(messages, max_tokens=1200):
async for chunk in self._stream_messages(
messages, max_tokens=self._chat_limits.thinking
):
if not chunk:
continue
chunks.append(chunk)
Expand Down Expand Up @@ -310,7 +371,9 @@ async def _stage_observing(
)

chunks: list[str] = []
async for chunk in self._stream_messages(messages, max_tokens=1200):
async for chunk in self._stream_messages(
messages, max_tokens=self._chat_limits.observing
):
if not chunk:
continue
chunks.append(chunk)
Expand Down Expand Up @@ -372,7 +435,9 @@ async def _stage_responding(
)

chunks: list[str] = []
async for chunk in self._stream_messages(messages, max_tokens=1800):
async for chunk in self._stream_messages(
messages, max_tokens=self._chat_limits.responding
):
if not chunk:
continue
chunks.append(chunk)
Expand Down Expand Up @@ -427,7 +492,9 @@ async def _stage_answer_now(
user_prompt = self._t(
"answer_now.user",
original_user_message=original_user_message,
partial_response=partial_response.strip() if partial_response.strip() else "(empty)",
partial_response=partial_response.strip()
if partial_response.strip()
else "(empty)",
trace_summary=trace_summary,
)
messages = self._build_messages(
Expand All @@ -437,7 +504,9 @@ async def _stage_answer_now(
)

chunks: list[str] = []
async for chunk in self._stream_messages(messages, max_tokens=1800):
async for chunk in self._stream_messages(
messages, max_tokens=self._chat_limits.answer_now
):
if not chunk:
continue
chunks.append(chunk)
Expand Down Expand Up @@ -496,7 +565,7 @@ async def _run_native_tool_loop(
messages=messages,
tools=tool_schemas,
tool_choice="auto",
**self._completion_kwargs(max_tokens=1500),
**self._completion_kwargs(max_tokens=self._chat_limits.acting),
)
self._accumulate_usage(response)
if not response.choices:
Expand Down Expand Up @@ -689,7 +758,7 @@ async def _run_react_fallback(
response_format={"type": "json_object"}
if supports_response_format(self.binding, self.model)
else None,
**self._completion_kwargs(max_tokens=800),
**self._completion_kwargs(max_tokens=self._chat_limits.react_fallback),
):
_chunks.append(_c)
response = "".join(_chunks)
Expand Down Expand Up @@ -820,9 +889,7 @@ def _build_messages(
if context.memory_context:
system_parts.append(context.memory_context)

messages: list[dict[str, Any]] = [
{"role": "system", "content": "\n\n".join(system_parts)}
]
messages: list[dict[str, Any]] = [{"role": "system", "content": "\n\n".join(system_parts)}]
for item in context.conversation_history:
role = item.get("role")
content = item.get("content")
Expand Down Expand Up @@ -890,15 +957,22 @@ def _build_openai_client(self):
)

def _completion_kwargs(self, max_tokens: int) -> dict[str, Any]:
kwargs: dict[str, Any] = {"temperature": 0.2}
kwargs: dict[str, Any] = {"temperature": self._chat_temperature}
if self.model:
kwargs.update(get_token_limit_kwargs(self.model, max_tokens))
return kwargs

def _can_use_native_tool_calling(self) -> bool:
if not supports_tools(self.binding, self.model):
return False
return self.binding not in {"anthropic", "claude", "ollama", "lm_studio", "vllm", "llama_cpp"}
return self.binding not in {
"anthropic",
"claude",
"ollama",
"lm_studio",
"vllm",
"llama_cpp",
}

def _normalize_enabled_tools(self, enabled_tools: list[str] | None) -> list[str]:
selected = enabled_tools or []
Expand All @@ -913,6 +987,7 @@ def _extract_answer_now_context(context: UnifiedContext) -> dict[str, Any] | Non
# Delegate to the shared helper so every capability uses the
# exact same gate (presence + non-empty original_user_message).
from deeptutor.capabilities._answer_now import extract_answer_now_context

return extract_answer_now_context(context)

async def _execute_tool_call(
Expand Down
8 changes: 6 additions & 2 deletions deeptutor/services/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@
KnowledgeBaseConfigService,
get_kb_config_service,
)
from .model_catalog import ModelCatalogService, get_model_catalog_service
from .loader import (
DEFAULT_CHAT_PARAMS,
PROJECT_ROOT,
get_agent_params,
get_runtime_settings_dir,
get_chat_params,
get_path_from_config,
get_runtime_settings_dir,
load_config_with_main,
parse_language,
resolve_config_path,
)
from .model_catalog import ModelCatalogService, get_model_catalog_service

__all__ = [
"ConfigSummary",
Expand All @@ -28,6 +30,8 @@
"get_path_from_config",
"parse_language",
"get_agent_params",
"get_chat_params",
"DEFAULT_CHAT_PARAMS",
"ResolvedLLMConfig",
"ResolvedEmbeddingConfig",
"ResolvedSearchConfig",
Expand Down
39 changes: 37 additions & 2 deletions deeptutor/services/config/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def get_runtime_settings_dir(project_root: Path | None = None) -> Path:
root = project_root or PROJECT_ROOT
return root / "data" / "user" / "settings"


def _deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
"""
Deep merge two dictionaries, values in override will override values in base
Expand Down Expand Up @@ -107,8 +108,7 @@ def resolve_config_path(
if config_path.exists():
return config_path, False
raise FileNotFoundError(
f"Configuration file not found: {config_file} "
f"(expected under {settings_dir})"
f"Configuration file not found: {config_file} (expected under {settings_dir})"
)


Expand Down Expand Up @@ -257,12 +257,47 @@ def get_agent_params(module_name: str) -> dict:
}


DEFAULT_CHAT_PARAMS: dict[str, Any] = {
"temperature": 0.2,
"responding": {"max_tokens": 8000},
"answer_now": {"max_tokens": 8000},
"thinking": {"max_tokens": 2000},
"observing": {"max_tokens": 2000},
"acting": {"max_tokens": 2000},
"react_fallback": {"max_tokens": 1500},
}


def get_chat_params() -> dict[str, Any]:
"""
Read ``capabilities.chat`` from agents.yaml with deep-merged defaults.

Unlike :func:`get_agent_params`, the chat capability has per-stage
sub-sections (``responding``, ``answer_now``, ``thinking``, ``observing``,
``acting``, ``react_fallback``), each with its own ``max_tokens``. A single
``temperature`` is shared across all stages.

Returns:
dict: Deep-merged chat configuration. Always contains every stage key
from :data:`DEFAULT_CHAT_PARAMS` so callers can index without checks.
"""
path = get_runtime_settings_dir(PROJECT_ROOT) / "agents.yaml"
cfg: dict[str, Any] = {}
if path.exists():
with open(path, encoding="utf-8") as f:
agents_config = yaml.safe_load(f) or {}
cfg = (agents_config.get("capabilities", {}) or {}).get("chat", {}) or {}
return _deep_merge(DEFAULT_CHAT_PARAMS, cfg)


__all__ = [
"PROJECT_ROOT",
"get_runtime_settings_dir",
"load_config_with_main",
"get_path_from_config",
"parse_language",
"get_agent_params",
"get_chat_params",
"DEFAULT_CHAT_PARAMS",
"_deep_merge",
]
13 changes: 11 additions & 2 deletions deeptutor/services/setup/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@
"research": {"temperature": 0.5, "max_tokens": 12000},
"question": {"temperature": 0.7, "max_tokens": 4096},
"co_writer": {"temperature": 0.7, "max_tokens": 4096},
"chat": {
"temperature": 0.2,
"responding": {"max_tokens": 8000},
"answer_now": {"max_tokens": 8000},
"thinking": {"max_tokens": 2000},
"observing": {"max_tokens": 2000},
"acting": {"max_tokens": 2000},
"react_fallback": {"max_tokens": 1500},
},
},
"tools": {
"brainstorm": {"temperature": 0.8, "max_tokens": 2048},
Expand Down Expand Up @@ -117,7 +126,7 @@ def init_user_directories(project_root: Path | None = None) -> None:

This function uses lazy initialization - directories are created on-demand
when files are saved, rather than pre-creating all directories at startup.

Only essential configuration files (like settings/interface.json) are
created at startup if they don't exist.

Expand Down Expand Up @@ -157,7 +166,7 @@ def init_user_directories(project_root: Path | None = None) -> None:
def _ensure_essential_settings(path_service) -> None:
"""
Ensure essential settings files exist.

This is the minimal initialization needed at startup.
All other directories are created on-demand when files are saved.
"""
Expand Down
Loading
Loading