From 5b49cb70fdacd82f39ddafd4cf5434670dcdce29 Mon Sep 17 00:00:00 2001 From: leelaugh <2560536821@qq.com> Date: Wed, 25 Mar 2026 14:25:25 +0800 Subject: [PATCH 1/4] docs: update tool safety and workflow docs - docs(readme): refresh phase 1 capability summary and recovery guidance in english - docs(readme): refresh phase 1 capability summary and recovery guidance in chinese - docs(book): clarify approval gates, recoverable failures, and git packaging tool behavior --- README.md | 3 +- README_zh.md | 3 +- book/src/04-tools-and-safety.md | 51 ++++++++++++++++++--------------- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 7276fce..c8f5df1 100644 --- a/README.md +++ b/README.md @@ -35,10 +35,11 @@ For coding work, a task enters through CLI and config setup, flows through the a - Agent loop and tool-calling basics - HTTP / LLM integration and SSE streaming - Safety-bounded `read`, `write`, and `bash` tools +- Repository-aware search, diff, staging, and commit packaging tools with policy-gated approvals - Test infrastructure and deterministic mdBook validation - Change-aware documentation automation for README and book chapters, including scope decisions, reference context, blocking verify, and review/rework evidence -`Phase 1` completes the in-repository coding workflow loop: structured git and patch tools, repository search, policy-gated mutations and approvals, bounded build/test, and `git_add` / `git_commit` for packaging changes. +`Phase 1` completes the in-repository coding workflow loop: structured git and patch tools, repository search, policy-gated mutations and approvals, bounded build/test, `git_add` / `git_commit` packaging guarded by the same approval model, and recovery guidance for blocked or inspectable tool failures. ## Documentation Automation diff --git a/README_zh.md b/README_zh.md index c09c0d5..7705ebf 100644 --- a/README_zh.md +++ b/README_zh.md @@ -35,10 +35,11 @@ NanoCodeAgent 是一个教学型 C++ code agent 运行时,重点放在确定 - agent loop 与基础 tool-calling - HTTP / LLM 集成与 SSE 流式解析 - 安全受限的 `read`、`write`、`bash` 工具 +- 带策略闸门审批的仓库搜索、diff、staging 与 commit 封装工具 - 测试基础设施与确定性的 mdBook 校验 - 面向 README 和书籍章节的 change-aware 文档自动化,包括 scope decision、reference context、blocking verify,以及 review/rework evidence -`Phase 1` 补齐了仓库内 coding workflow 闭环:结构化 git 与 patch 工具、仓库搜索、带策略闸门的变更与审批、受限 build/test,以及用于封装变更的 `git_add` / `git_commit`。 +`Phase 1` 补齐了仓库内 coding workflow 闭环:结构化 git 与 patch 工具、仓库搜索、带策略闸门的变更与审批、受限 build/test、受同一审批模型保护的 `git_add` / `git_commit` 封装流程,以及针对 blocked / inspectable 工具失败的恢复提示。 ## Documentation Automation diff --git a/book/src/04-tools-and-safety.md b/book/src/04-tools-and-safety.md index 6e5e2ce..152d1e9 100644 --- a/book/src/04-tools-and-safety.md +++ b/book/src/04-tools-and-safety.md @@ -14,31 +14,33 @@ 2. **审批闸门**:就算模型请求了某个工具,当前运行策略是否允许它执行。 3. **执行与停机闸门**:工具真的开始执行后,它是否仍受路径边界、输出上限、超时和 fail-fast 约束。 -这三层关系图如下,阅读时只要顺着“请求是否被接住、是否被允许、执行后是否继续”这条控制链往右看: +这三层关系图如下,阅读时只要顺着“请求是否被接住、审批是否满足、执行后是继续、给提示,还是停机”这条控制链往右看: ```mermaid flowchart LR Request[Assistant Tool Call] Registered{Registered?} - Approved{Allowed By Policy?} + Approved{Approvals Satisfied?} Executor[Bounded Executor] Result[Tool Result] - Healthy{Result Clean?} + Recoverable{Recoverable?} + Guidance[Guidance] Stop[Fail-Fast Stop] Next[Next Turn] Request --> Registered Registered -- no --> Stop Registered -- yes --> Approved - Approved -- no --> Stop + Approved -- no --> Guidance Approved -- yes --> Executor Executor --> Result - Result --> Healthy - Healthy -- no --> Stop - Healthy -- yes --> Next + Result --> Recoverable + Recoverable -- yes --> Guidance + Guidance --> Next + Recoverable -- no --> Stop ``` -这张图只展示控制链:请求先确认“系统认不认识这个工具”,再确认“当前策略允不允许它执行”,随后才进入受限 executor,最后由 loop 按结果决定继续还是停机。它没有展开 read-only / mutating / execution 的具体分类细目,也没有展开 executor 内部实现;目的只是帮助读者先抓住边界是如何一层层收紧的。 +这张图只展示控制链:请求先确认“系统认不认识这个工具”,再确认“当前策略要求的审批是否满足”,随后才进入受限 executor,最后由 loop 按结果决定继续、补一条恢复提示,还是停机。它没有展开 read-only、mutating、execution 的具体分类细目,也没有展开 executor 内部实现;目的只是帮助读者先抓住边界是如何一层层收紧、以及失败是如何被分流的。 ## 3. 主流程 (Main Flow) 当 `src/agent_loop.cpp` 收到一条带有 `tool_calls` 的 assistant message 后,它会顺序取出每个调用,解析 `function.arguments`,然后交给 `execute_tool()`。真正的统一入口在 `ToolRegistry`。 @@ -54,15 +56,15 @@ flowchart LR - `mutating` - `execution` -只读工具注册后会被归一化成无需审批;变更类和执行类默认阻止,只有 `allow_mutating_tools` 或 `allow_execution_tools` 被明确打开时才放行。注意,这里的 approval 不是“人工逐条弹窗确认”,而是运行开始前就确定好的策略门。 +只读工具注册后会被归一化成无需审批;变更类和执行类默认阻止,只有 `allow_mutating_tools` 或 `allow_execution_tools` 被明确打开时才放行。更具体地说,`git_add` 和 `git_commit` 虽然在分类上归到 `mutating`,但它们同时被标记为“会改仓库状态”以及“会执行 repo-controlled git path 与 hook 行为”,因此默认需要同时满足 mutating 与 execution 两类审批。注意,这里的 approval 不是“人工逐条弹窗确认”,而是运行开始前就确定好的策略门。 一旦通过审批闸门,调用才会进入具体 executor。到这里,不同工具的边界开始分化: - 文件工具依赖 workspace 解析和 no-follow 打开策略; -- repo 只读工具依赖受限目录、rg/git 参数硬化和输出限制; -- bash/build/test 工具依赖工作目录锁定、环境清理、超时和输出截断。 +- repo 工具依赖受限目录、rg 与 git 的参数硬化和输出限制; +- bash、build、test 工具依赖工作目录锁定、环境清理、超时和输出截断。 -最后,`agent_loop` 会把工具结果作为 `role=tool` 消息写回上下文。如果结果里出现 `blocked`、`failed`、`ok:false` 或 `timed_out:true` 这类污染信号,loop 会直接停止,而不是继续让模型带着坏状态往下跑。 +最后,`agent_loop` 会把工具结果作为 `role=tool` 消息写回上下文。这里已经不再把所有失败都粗暴地视为同一种污染:`blocked` 会被归类成需要换工具或改运行策略的 recoverable failure,`apply_patch` 的 `no_match` 与 `multiple_matches` 会被归类成 needs-inspection,build 或 test 超时会被归类成 retryable;只有非结构化失败、普通 executor 失败,或者同一 recoverable failure 被原样重复到超出 retry budget 时,loop 才会真正 fail-fast 停止。 ## 4. 一个真实任务下,工具是如何被允许、被拒绝、然后被停下的? (Worked Example) 设想用户给系统一个任务:请你读取 `src/main.cpp`,如果需要就修改一点内容并跑一次命令验证。 @@ -73,16 +75,19 @@ flowchart LR 这是只读工具,registry 直接放行。 2. 接着模型想请求 `write_file_safe` 或 `apply_patch`。 - 如果本轮没有打开 `allow_mutating_tools`,registry 会直接返回 `blocked`,executor 根本不会被调用。 + 如果本轮没有打开 `allow_mutating_tools`,registry 会直接返回 `blocked`,executor 根本不会被调用;loop 会把这个结果写回上下文,并附上一条“改用允许的只读工具,或在 run 外调整审批”的恢复提示。 3. 如果模型进一步请求 `bash_execute_safe` 或 `test_project_safe`,情况更严格。 这些都属于执行类工具;如果 `allow_execution_tools` 没打开,它们同样会在 registry 层被挡下。 -4. 如果执行类工具被允许,真正的 executor 仍然要再过一层边界。 +4. 如果模型想把已经准备好的改动打包进 git,`git_add` 和 `git_commit` 的要求还会再提高一层。 + 这两者都会修改仓库状态,而且 `git` 命令本身可能触发仓库控制的 hook 与 config 路径,所以默认需要同时打开 mutating 与 execution 两类审批;少任何一类,registry 都会在 executor 之前返回 `blocked`,并明确列出 `missing_approvals`。 + +5. 如果执行类工具被允许,真正的 executor 仍然要再过一层边界。 例如 `bash_execute_safe()` 会固定 cwd 到 workspace、清理环境变量、用双管道读输出、在超时或输出失控时杀掉进程组。 -5. 只要其中任何一步返回失败、超时或阻止状态,`agent_loop` 就会 fail-fast 停止。 - 这也是为什么“工具被看到”和“工具真的被执行”是两回事。 +6. 只要其中某一步落到 fatal failure,或者模型收到 recoverable guidance 后仍原样重复同一种失败直到耗尽 retry budget,`agent_loop` 才会 fail-fast 停止。 + 这也是为什么“工具被看到”和“工具真的被执行”是两回事,而“工具失败”与“整轮立刻终止”现在也不再是完全同义。 这个例子想说明的不是“系统特别保守”,而是“工具链路的每一层都在明确回答自己的那一个问题”:可见吗、允许吗、怎么执行、失败后怎么办。 @@ -90,15 +95,15 @@ flowchart LR - `src/agent_tools.cpp` 定义默认工具集合,把名称、描述、参数 schema、类别和执行函数绑在一起。这里决定的是“模型能请求什么”。 - `src/tool_registry.cpp` - 是策略门。它决定的是“当前这个请求在本轮运行里能不能执行”。 + 是策略门。它决定的是“当前这个请求在本轮运行里能不能执行”,并把缺失的是 mutating、execution,还是两者都缺失,显式写回结构化结果。 - `src/read_file.cpp`、`src/write_file.cpp`、`src/apply_patch.cpp` 是文件边界的第一线。它们不是泛泛检查字符串,而是结合 workspace 解析和安全打开策略来防越界与 symlink 穿透。 - `src/repo_tools.cpp` - 是只读观察面的 executor。它们也需要硬化,因为底层仍然会调用 rg 或 git。 + 不再只是只读观察面。这里既承载 `git_status`、`git_diff`、`git_show` 这类观察工具,也承载 `git_add` 与 `git_commit` 这种封装型仓库变更工具;它们仍然需要参数硬化、路径约束和输出边界。 - `src/bash_tool.cpp` 与 `src/build_test_tools.cpp` 是执行面。这里不承担“绝对隔离”的承诺,而承担“有限执行、及时收束、尽量不泄漏”的承诺。 - `src/agent_loop.cpp` - 是最后一道停机闸门。它用工具数、轮数、上下文和 fail-fast 规则防止错误继续扩散。 + 是最后一道分流与停机闸门。它用工具数、轮数、上下文、recoverable guidance 与 fail-fast 规则防止错误继续扩散。 ## 6. 作为贡献者,你通常怎么理解当前工具面? (What You Usually Do) 如果你是第一次想看清这套工具系统,最有效的顺序通常不是从工具名开始背,而是从“哪一层在回答哪个问题”开始: @@ -106,7 +111,7 @@ flowchart LR 1. 先看 `build_default_tool_registry()` 和 `get_agent_tools_schema()`,确认模型能看到哪些类别的工具。 2. 再看 `src/tool_registry.cpp`,确认默认策略如何区分只读、变更和执行。 3. 然后看具体 executor,理解每类工具的真实风险差异。 -4. 最后回到 `src/agent_loop.cpp`,看系统在工具失败后如何停下。 +4. 最后回到 `src/agent_loop.cpp`,看系统如何区分 blocked、retryable、needs-inspection、fatal,并决定是补 guidance 还是停机。 如果你正在改某一条边界,最相关的测试入口通常是: @@ -120,7 +125,7 @@ flowchart LR 不对。schema 暴露的是“模型知道它存在”,真正能否执行要看 `ToolRegistry` 和当前配置策略。 ### 误解二:approval 是人工逐条审批 -也不对。当前实现里的 approval 是运行前确定的策略开关,不是交互式确认流程。 +也不对。当前实现里的 approval 是运行前确定的策略开关,不是交互式确认流程;而且像 `git_add` 与 `git_commit` 这样的仓库封装工具,可能同时缺 mutating 与 execution 两类审批。 ### 误解三:`bash_execute_safe()` 相当于容器或系统级沙箱 这正是最需要纠正的过度安全表述。当前实现会锁定 workspace、清理环境、限制输出、设置超时并 kill 进程组,但它不是 `chroot`、不是 seccomp,也不是容器。 @@ -128,8 +133,8 @@ flowchart LR ### 误解四:文件工具和 bash 工具有差不多的边界强度 不是。当前最强的路径边界主要在文件工具,而不是 bash。文件工具在 workspace 解析和安全打开上更严格;bash 更像“受限执行器”,不是“绝对隔离器”。 -### 误解五:agent loop 会智能理解“这轮已经没意义了” -当前实现没有那种语义层面的智能裁决。它主要依靠固定阈值和结果状态字符串来 fail-fast。这种做法很朴素,但优点是边界明确。 +### 误解五:只要工具结果里出现失败字样,agent loop 就会立刻停机 +现在不能这样概括。当前实现会先做结构化分类:`blocked`、部分 patch 拒绝、以及 build 或 test timeout 会先转成 guidance,让模型换观察手段、修参数,或停止重复同一坏调用;只有 fatal failure 或重复 recoverable failure 超出预算时才真正停机。 最常见的失败模式也恰好对应这些误解:默认应阻止的工具被放行、shell 输出失控、后台进程泄漏、工具失败后 loop 继续跑、读写路径被越界利用。正因为这些问题具体而危险,这章才必须把“安全”拆回控制链,而不是抽象口号。 From 893f1facbb745a4d1136c49dbe7f9895a666a4ad Mon Sep 17 00:00:00 2001 From: leelaugh <2560536821@qq.com> Date: Sat, 28 Mar 2026 17:48:02 +0800 Subject: [PATCH 2/4] feat(docgen): Implement Subagent architecture for docgen with context isolation - Add core data structures for 4 Subagents (Change Analyst, Context Router, Patch Writer, Micro Reviewer) - Implement independent LLM client with per-subagent configuration - Create orchestrator pipeline for sequential workflow - Add comprehensive unit tests for data structures - Fix build integration and string literal issues - Optimize token consumption through local context processing --- CMakeLists.txt | 2 + include/docgen_llm.hpp | 119 ++++++++++++++++ include/docgen_pipeline.hpp | 61 +++++++++ include/docgen_types.hpp | 189 ++++++++++++++++++++++++++ src/docgen_llm.cpp | 152 +++++++++++++++++++++ src/docgen_pipeline.cpp | 262 ++++++++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 8 +- tests/test_docgen_types.cpp | 252 ++++++++++++++++++++++++++++++++++ 8 files changed, 1044 insertions(+), 1 deletion(-) create mode 100644 include/docgen_llm.hpp create mode 100644 include/docgen_pipeline.hpp create mode 100644 include/docgen_types.hpp create mode 100644 src/docgen_llm.cpp create mode 100644 src/docgen_pipeline.cpp create mode 100644 tests/test_docgen_types.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 55b90d4..bfd3157 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,8 @@ set(SOURCES src/agent_tools.cpp src/apply_patch.cpp src/agent_loop.cpp + src/docgen_llm.cpp + src/docgen_pipeline.cpp ) # Main executable diff --git a/include/docgen_llm.hpp b/include/docgen_llm.hpp new file mode 100644 index 0000000..994b5ac --- /dev/null +++ b/include/docgen_llm.hpp @@ -0,0 +1,119 @@ +#pragma once + +#include "config.hpp" +#include "docgen_types.hpp" +#include +#include +#include +#include + +namespace docgen { + +class LlmClient { +public: + using StreamCallback = std::function; + + LlmClient(const AgentConfig& cfg, const SubagentContext& ctx); + + std::expected call( + const std::string& system_prompt, + const nlohmann::json& user_context, + StreamCallback on_delta = nullptr + ); + + std::expected call_json( + const std::string& system_prompt, + const nlohmann::json& user_context, + StreamCallback on_delta = nullptr + ); + +private: + AgentConfig config_; + SubagentContext ctx_; + + std::string extract_json_from_response(const std::string& content); +}; + +inline constexpr std::string_view kChangeAnalystPrompt = R"( +You are a precise code change analyst. Output ONLY valid JSON. + +INPUT: git diff text + +OUTPUT FORMAT (strict JSON): +{ + "affected_files": [ + {"path": "relative/path", "change_type": "add|modify|delete|rename", "old_path": "optional for rename", "public_symbols": ["optional list"]} + ], + "intent_summary": "one sentence describing the change purpose" +} + +RULES: +1. Only include files with actual changes +2. change_type must be one of: add, modify, delete, rename +3. For renames, include old_path +4. public_symbols: only exportable/public symbols affected +5. NO prose, NO markdown code fences, ONLY the JSON object +)"; + +inline constexpr std::string_view kContextRouterPrompt = R"( +You are a document section locator. Output ONLY valid JSON. + +INPUT: +- doc_outline: JSON with file_path and headings array +- intent_summary: what change is needed + +OUTPUT FORMAT (strict JSON): +{ + "locations": [ + {"target_file": "path/to/doc.md", "start_line": N, "end_line": M, "section_heading": "optional heading"} + ] +} + +RULES: +1. Return only sections that need modification +2. start_line and end_line must be valid line numbers (1-based) +3. If no sections need changes, return {"locations": []} +4. NO prose, NO markdown code fences, ONLY the JSON object +)"; + +inline constexpr std::string_view kPatchWriterPrompt = +"You are a precise document patch generator. Output ONLY valid JSON.\n\n" +"INPUT:\n" +"- original_text: the exact text block to modify (line-numbered)\n" +"- intent_summary: what change is needed\n\n" +"OUTPUT FORMAT (strict JSON):\n" +"{\n" +" \"patches\": [\n" +" {\n" +" \"action\": \"replace|insert_before|insert_after|delete\",\n" +" \"old_text\": \"exact text to find (for replace/delete)\",\n" +" \"new_text\": \"replacement content (empty for delete)\"\n" +" }\n" +" ],\n" +" \"rationale\": \"one-line explanation\"\n" +"}\n\n" +"RULES:\n" +"1. old_text MUST match EXACTLY (whitespace-sensitive) for replace/delete\n" +"2. Each patch operates on original_text independently (not sequential)\n" +"3. Use multiple patches only when truly independent\n" +"4. NO prose, NO markdown code fences, ONLY the JSON object\n" +"5. If no change needed, return {\"patches\": [], \"rationale\": \"no change required\"}"; + +inline constexpr std::string_view kMicroReviewerPrompt = +"You are a minimal document patch reviewer. Output ONLY valid JSON.\n\n" +"INPUT:\n" +"- before: original text\n" +"- after: modified text\n" +"- intent_summary: intended change\n\n" +"OUTPUT FORMAT (strict JSON):\n" +"{\n" +" \"verdict\": \"approve|reject\",\n" +" \"reason\": \"one sentence\",\n" +" \"issues\": [\"optional list of problems if rejected\"]\n" +"}\n\n" +"RULES:\n" +"1. approve if: change matches intent, no syntax errors, maintains document structure\n" +"2. reject if: wrong content, broken formatting, unrelated changes\n" +"3. NO prose, NO markdown code fences, ONLY the JSON object"; + +} // namespace docgen diff --git a/include/docgen_pipeline.hpp b/include/docgen_pipeline.hpp new file mode 100644 index 0000000..fb7541e --- /dev/null +++ b/include/docgen_pipeline.hpp @@ -0,0 +1,61 @@ +#pragma once + +#include "config.hpp" +#include "docgen_types.hpp" +#include "docgen_llm.hpp" +#include "apply_patch.hpp" +#include +#include +#include + +namespace docgen { + +struct PipelineResult { + bool success = false; + std::string error; + int patches_applied = 0; + int patches_rejected = 0; +}; + +class DocgenPipeline { +public: + DocgenPipeline(const AgentConfig& cfg, const SubagentContext& ctx = {}); + + std::expected run(const std::string& diff_text); + + void set_target_files(std::vector files); + void set_on_progress(std::function cb); + +private: + std::expected analyze_changes(const std::string& diff); + std::expected locate_targets( + const std::string& doc_content, + const DocOutline& outline, + const ChangeAnalystResult& changes + ); + std::expected write_patches( + const std::string& original_text, + const std::string& intent + ); + std::expected review_patch( + const std::string& before, + const std::string& after + ); + + std::optional extract_outline(const std::string& file_path); + std::string read_file_content(const std::string& rel_path); + std::string read_lines(const std::string& content, int start, int end); + + std::vector to_patch_entries(const PatchWriterResult& patches, const std::string& original); + bool apply_patches(const std::string& rel_path, const std::vector& entries); + + AgentConfig config_; + SubagentContext ctx_; + std::unique_ptr llm_; + std::vector target_files_{"README.md"}; + std::function on_progress_; +}; + +std::expected parse_markdown_outline(const std::string& content); + +} // namespace docgen diff --git a/include/docgen_types.hpp b/include/docgen_types.hpp new file mode 100644 index 0000000..e8c887e --- /dev/null +++ b/include/docgen_types.hpp @@ -0,0 +1,189 @@ +#pragma once + +#include +#include +#include +#include + +namespace docgen { + +struct AffectedFile { + std::string path; + std::string change_type; + std::optional old_path; + std::vector public_symbols; +}; + +struct ChangeAnalystResult { + std::vector affected_files; + std::string intent_summary; +}; + +inline void to_json(nlohmann::json& j, const AffectedFile& f) { + j = nlohmann::json{{"path", f.path}, {"change_type", f.change_type}}; + if (f.old_path) j["old_path"] = *f.old_path; + if (!f.public_symbols.empty()) j["public_symbols"] = f.public_symbols; +} + +inline void from_json(const nlohmann::json& j, AffectedFile& f) { + j.at("path").get_to(f.path); + j.at("change_type").get_to(f.change_type); + if (j.contains("old_path")) f.old_path = j["old_path"].get(); + if (j.contains("public_symbols")) f.public_symbols = j["public_symbols"].get>(); +} + +inline void to_json(nlohmann::json& j, const ChangeAnalystResult& r) { + j = nlohmann::json{{"affected_files", r.affected_files}, {"intent_summary", r.intent_summary}}; +} + +inline void from_json(const nlohmann::json& j, ChangeAnalystResult& r) { + j.at("affected_files").get_to(r.affected_files); + j.at("intent_summary").get_to(r.intent_summary); +} + +struct DocLocation { + std::string target_file; + int start_line = 0; + int end_line = 0; + std::optional section_heading; +}; + +struct ContextRouterResult { + std::vector locations; +}; + +inline void to_json(nlohmann::json& j, const DocLocation& l) { + j = nlohmann::json{{"target_file", l.target_file}, {"start_line", l.start_line}, {"end_line", l.end_line}}; + if (l.section_heading) j["section_heading"] = *l.section_heading; +} + +inline void from_json(const nlohmann::json& j, DocLocation& l) { + j.at("target_file").get_to(l.target_file); + j.at("start_line").get_to(l.start_line); + j.at("end_line").get_to(l.end_line); + if (j.contains("section_heading")) l.section_heading = j["section_heading"].get(); +} + +inline void to_json(nlohmann::json& j, const ContextRouterResult& r) { + j = nlohmann::json{{"locations", r.locations}}; +} + +inline void from_json(const nlohmann::json& j, ContextRouterResult& r) { + j.at("locations").get_to(r.locations); +} + +enum class PatchAction { Replace, InsertBefore, InsertAfter, Delete }; + +inline std::string patch_action_to_string(PatchAction a) { + switch (a) { + case PatchAction::Replace: return "replace"; + case PatchAction::InsertBefore: return "insert_before"; + case PatchAction::InsertAfter: return "insert_after"; + case PatchAction::Delete: return "delete"; + } + return "replace"; +} + +inline std::optional patch_action_from_string(const std::string& s) { + if (s == "replace") return PatchAction::Replace; + if (s == "insert_before") return PatchAction::InsertBefore; + if (s == "insert_after") return PatchAction::InsertAfter; + if (s == "delete") return PatchAction::Delete; + return std::nullopt; +} + +struct TextPatch { + PatchAction action = PatchAction::Replace; + std::string old_text; + std::string new_text; + std::optional line_hint; +}; + +struct PatchWriterResult { + std::vector patches; + std::optional rationale; +}; + +inline void to_json(nlohmann::json& j, const TextPatch& p) { + j = nlohmann::json{{"action", patch_action_to_string(p.action)}, {"old_text", p.old_text}, {"new_text", p.new_text}}; + if (p.line_hint) j["line_hint"] = *p.line_hint; +} + +inline void from_json(const nlohmann::json& j, TextPatch& p) { + auto act = j.at("action").get(); + auto action_opt = patch_action_from_string(act); + if (!action_opt) { + throw std::runtime_error("Invalid patch action: " + act); + } + p.action = *action_opt; + if (j.contains("old_text")) j["old_text"].get_to(p.old_text); + if (j.contains("new_text")) j["new_text"].get_to(p.new_text); + if (j.contains("line_hint")) p.line_hint = j["line_hint"].get(); +} + +inline void to_json(nlohmann::json& j, const PatchWriterResult& r) { + j = nlohmann::json{{"patches", r.patches}}; + if (r.rationale) j["rationale"] = *r.rationale; +} + +inline void from_json(const nlohmann::json& j, PatchWriterResult& r) { + j.at("patches").get_to(r.patches); + if (j.contains("rationale")) r.rationale = j["rationale"].get(); +} + +enum class ReviewVerdict { Approve, Reject }; + +struct MicroReviewResult { + ReviewVerdict verdict = ReviewVerdict::Approve; + std::string reason; + std::vector issues; +}; + +inline void to_json(nlohmann::json& j, const MicroReviewResult& r) { + j = nlohmann::json{{"verdict", r.verdict == ReviewVerdict::Approve ? "approve" : "reject"}, {"reason", r.reason}}; + if (!r.issues.empty()) j["issues"] = r.issues; +} + +inline void from_json(const nlohmann::json& j, MicroReviewResult& r) { + auto v = j.at("verdict").get(); + r.verdict = (v == "approve") ? ReviewVerdict::Approve : ReviewVerdict::Reject; + j.at("reason").get_to(r.reason); + if (j.contains("issues")) j["issues"].get_to(r.issues); +} + +struct DocOutline { + std::string file_path; + struct Heading { + int level; + int line_number; + std::string text; + }; + std::vector headings; +}; + +inline void to_json(nlohmann::json& j, const DocOutline::Heading& h) { + j = nlohmann::json{{"level", h.level}, {"line_number", h.line_number}, {"text", h.text}}; +} + +inline void from_json(const nlohmann::json& j, DocOutline::Heading& h) { + j.at("level").get_to(h.level); + j.at("line_number").get_to(h.line_number); + j.at("text").get_to(h.text); +} + +inline void to_json(nlohmann::json& j, const DocOutline& o) { + j = nlohmann::json{{"file_path", o.file_path}, {"headings", o.headings}}; +} + +inline void from_json(const nlohmann::json& j, DocOutline& o) { + j.at("file_path").get_to(o.file_path); + j.at("headings").get_to(o.headings); +} + +struct SubagentContext { + int max_input_bytes = 16000; + int timeout_ms = 60000; + int max_retries = 2; +}; + +} // namespace docgen diff --git a/src/docgen_llm.cpp b/src/docgen_llm.cpp new file mode 100644 index 0000000..1e199f5 --- /dev/null +++ b/src/docgen_llm.cpp @@ -0,0 +1,152 @@ +#include "docgen_llm.hpp" +#include "llm.hpp" +#include "http.hpp" +#include "logger.hpp" +#include "sse_parser.hpp" +#include +#include + +namespace docgen { + +LlmClient::LlmClient(const AgentConfig& cfg, const SubagentContext& ctx) + : config_(cfg), ctx_(ctx) {} + +std::expected LlmClient::call( + const std::string& system_prompt, + const nlohmann::json& user_context, + StreamCallback on_delta +) { + if (!config_.api_key.has_value() || config_.api_key->empty()) { + return std::unexpected("API key is not configured"); + } + + nlohmann::json messages = nlohmann::json::array(); + messages.push_back({{"role", "system"}, {"content", system_prompt}}); + messages.push_back({{"role", "user"}, {"content", user_context.dump()}}); + + nlohmann::json req = { + {"model", config_.model}, + {"messages", messages}, + {"stream", true} + }; + + std::string url = config_.base_url; + if (!url.empty() && url.back() == '/') url.pop_back(); + if (url.find("/chat/completions") == std::string::npos) { + url += "/chat/completions"; + } + + std::string payload = req.dump(); + std::vector headers = { + "Content-Type: application/json", + "Authorization: Bearer " + *config_.api_key + }; + + SseParser parser; + std::string full_content; + std::string err_msg; + bool process_ok = true; + + HttpOptions opts; + opts.timeout_ms = ctx_.timeout_ms; + + bool req_ok = http_post_json_stream( + url, headers, payload, opts, + [&](const std::string& chunk) { + std::string local_err; + auto events = parser.feed(chunk); + for (const auto& event : events) { + if (event == "[DONE]") return true; + + try { + auto parsed = nlohmann::json::parse(event); + if (parsed.contains("error")) { + err_msg = "API error: " + parsed["error"].dump(); + return false; + } + if (parsed.contains("choices") && !parsed["choices"].empty()) { + auto delta = parsed["choices"][0].value("delta", nlohmann::json{}); + if (delta.contains("content") && delta["content"].is_string()) { + std::string txt = delta["content"].get(); + full_content += txt; + if (on_delta && !on_delta(txt)) { + return false; + } + } + } + } catch (const std::exception& e) { + err_msg = "Parse error: " + std::string(e.what()); + return false; + } + } + return true; + }, + &err_msg + ); + + if (!req_ok) { + return std::unexpected("Network error: " + err_msg); + } + if (!process_ok) { + return std::unexpected(err_msg); + } + + return nlohmann::json{{"content", full_content}}; +} + +std::expected LlmClient::call_json( + const std::string& system_prompt, + const nlohmann::json& user_context, + StreamCallback on_delta +) { + auto resp = call(system_prompt, user_context, on_delta); + if (!resp) { + return std::unexpected(resp.error()); + } + + std::string content = (*resp)["content"].get(); + std::string json_str = extract_json_from_response(content); + + try { + return nlohmann::json::parse(json_str); + } catch (const std::exception& e) { + return std::unexpected("JSON parse error: " + std::string(e.what()) + " in: " + json_str.substr(0, 200)); + } +} + +std::string LlmClient::extract_json_from_response(const std::string& content) { + std::string trimmed; + for (char c : content) { + if (c != ' ' && c != '\n' && c != '\r' && c != '\t') { + trimmed += c; + } + } + + if (!trimmed.empty() && trimmed[0] == '{') { + size_t brace_count = 0; + size_t end_pos = 0; + for (size_t i = 0; i < trimmed.size(); ++i) { + if (trimmed[i] == '{') ++brace_count; + else if (trimmed[i] == '}') { + --brace_count; + if (brace_count == 0) { + end_pos = i + 1; + break; + } + } + } + if (end_pos > 0) { + return trimmed.substr(0, end_pos); + } + } + + std::regex json_block(R"(```(?:json)?\s*(\{[\s\S]*?\})\s*```)"); + std::smatch match; + if (std::regex_search(content, match, json_block)) { + return match[1].str(); + } + + return content; +} + +} // namespace docgen diff --git a/src/docgen_pipeline.cpp b/src/docgen_pipeline.cpp new file mode 100644 index 0000000..222e48d --- /dev/null +++ b/src/docgen_pipeline.cpp @@ -0,0 +1,262 @@ +#include "docgen_pipeline.hpp" +#include "logger.hpp" +#include +#include +#include +#include + +namespace docgen { + +namespace fs = std::filesystem; + +DocgenPipeline::DocgenPipeline(const AgentConfig& cfg, const SubagentContext& ctx) + : config_(cfg), ctx_(ctx), llm_(std::make_unique(cfg, ctx)) {} + +void DocgenPipeline::set_target_files(std::vector files) { + target_files_ = std::move(files); +} + +void DocgenPipeline::set_on_progress(std::function cb) { + on_progress_ = std::move(cb); +} + +std::expected DocgenPipeline::run(const std::string& diff_text) { + PipelineResult result; + + if (on_progress_) on_progress_("analyze", "parsing diff"); + + auto changes = analyze_changes(diff_text); + if (!changes) { + return std::unexpected(changes.error()); + } + + if (changes->affected_files.empty()) { + result.success = true; + return result; + } + + for (const auto& target_file : target_files_) { + if (on_progress_) on_progress_("locate", target_file); + + auto outline = extract_outline(target_file); + if (!outline) continue; + + auto content = read_file_content(target_file); + if (content.empty()) continue; + + auto locations = locate_targets(content, *outline, *changes); + if (!locations || locations->locations.empty()) continue; + + for (const auto& loc : locations->locations) { + if (on_progress_) on_progress_("patch", + target_file + ":" + std::to_string(loc.start_line)); + + auto original = read_lines(content, loc.start_line, loc.end_line); + auto patch_result = write_patches(original, changes->intent_summary); + if (!patch_result || patch_result->patches.empty()) continue; + + auto after = original; + for (const auto& p : patch_result->patches) { + if (p.action == PatchAction::Replace || p.action == PatchAction::Delete) { + auto pos = after.find(p.old_text); + if (pos != std::string::npos) { + after.replace(pos, p.old_text.length(), p.new_text); + } + } + } + + auto review = review_patch(original, after); + if (!review || review->verdict == ReviewVerdict::Reject) { + result.patches_rejected += static_cast(patch_result->patches.size()); + continue; + } + + auto entries = to_patch_entries(*patch_result, original); + if (apply_patches(target_file, entries)) { + result.patches_applied += static_cast(entries.size()); + } + } + } + + result.success = true; + return result; +} + +std::expected DocgenPipeline::analyze_changes(const std::string& diff) { + nlohmann::json user_ctx = {{"diff", diff}}; + + auto resp = llm_->call_json(std::string(kChangeAnalystPrompt), user_ctx); + if (!resp) { + return std::unexpected(resp.error()); + } + + try { + return resp->get(); + } catch (const std::exception& e) { + return std::unexpected(std::string("JSON parse error: ") + e.what()); + } +} + +std::expected DocgenPipeline::locate_targets( + const std::string& doc_content, + const DocOutline& outline, + const ChangeAnalystResult& changes +) { + nlohmann::json user_ctx = { + {"doc_outline", outline}, + {"intent_summary", changes.intent_summary} + }; + + auto resp = llm_->call_json(std::string(kContextRouterPrompt), user_ctx); + if (!resp) { + return std::unexpected(resp.error()); + } + + try { + return resp->get(); + } catch (const std::exception& e) { + return std::unexpected(std::string("JSON parse error: ") + e.what()); + } +} + +std::expected DocgenPipeline::write_patches( + const std::string& original_text, + const std::string& intent +) { + std::string numbered; + int line_num = 1; + std::istringstream iss(original_text); + for (std::string line; std::getline(iss, line); ++line_num) { + numbered += std::to_string(line_num) + ": " + line + "\n"; + } + + nlohmann::json user_ctx = { + {"original_text", numbered}, + {"intent_summary", intent} + }; + + auto resp = llm_->call_json(std::string(kPatchWriterPrompt), user_ctx); + if (!resp) { + return std::unexpected(resp.error()); + } + + try { + return resp->get(); + } catch (const std::exception& e) { + return std::unexpected(std::string("JSON parse error: ") + e.what()); + } +} + +std::expected DocgenPipeline::review_patch( + const std::string& before, + const std::string& after +) { + nlohmann::json user_ctx = { + {"before", before}, + {"after", after} + }; + + auto resp = llm_->call_json(std::string(kMicroReviewerPrompt), user_ctx); + if (!resp) { + return std::unexpected(resp.error()); + } + + try { + return resp->get(); + } catch (const std::exception& e) { + return std::unexpected(std::string("JSON parse error: ") + e.what()); + } +} + +std::optional DocgenPipeline::extract_outline(const std::string& file_path) { + auto content = read_file_content(file_path); + if (content.empty()) return std::nullopt; + + auto outline_result = parse_markdown_outline(content); + if (!outline_result) return std::nullopt; + + outline_result->file_path = file_path; + return *outline_result; +} + +std::string DocgenPipeline::read_file_content(const std::string& rel_path) { + fs::path full_path = fs::path(config_.workspace_abs) / rel_path; + + std::ifstream f(full_path, std::ios::binary); + if (!f) return {}; + + std::ostringstream ss; + ss << f.rdbuf(); + return ss.str(); +} + +std::string DocgenPipeline::read_lines(const std::string& content, int start, int end) { + if (start < 1 || end < start) return {}; + + std::istringstream iss(content); + std::string result; + std::string line; + int line_num = 1; + + while (std::getline(iss, line)) { + if (line_num >= start && line_num <= end) { + result += line + "\n"; + } + if (line_num > end) break; + ++line_num; + } + + if (!result.empty() && result.back() == '\n') { + result.pop_back(); + } + + return result; +} + +std::vector DocgenPipeline::to_patch_entries( + const PatchWriterResult& patches, + const std::string& original +) { + std::vector entries; + + for (const auto& p : patches.patches) { + if (p.action == PatchAction::Replace || p.action == PatchAction::Delete) { + entries.push_back({p.old_text, p.new_text}); + } + } + + return entries; +} + +bool DocgenPipeline::apply_patches(const std::string& rel_path, const std::vector& entries) { + if (entries.empty()) return true; + + auto result = apply_patch_batch(config_.workspace_abs, rel_path, entries); + if (!result.ok) { + LOG_ERROR("apply_patch_batch failed: {}", result.err); + } + return result.ok; +} + +std::expected parse_markdown_outline(const std::string& content) { + DocOutline outline; + std::regex heading_re(R"(^(#{1,6})\s+(.+)$)", std::regex::multiline); + + std::istringstream iss(content); + std::string line; + int line_num = 1; + + while (std::getline(iss, line)) { + std::smatch match; + if (std::regex_search(line, match, heading_re)) { + int level = static_cast(match[1].str().length()); + std::string text = match[2].str(); + outline.headings.push_back({level, line_num, text}); + } + ++line_num; + } + + return outline; +} + +} // namespace docgen diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2131184..2639019 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -17,6 +17,8 @@ set(TEST_SOURCES ../src/agent_tools.cpp ../src/apply_patch.cpp ../src/agent_loop.cpp + ../src/docgen_llm.cpp + ../src/docgen_pipeline.cpp ) # CLI Test @@ -130,5 +132,9 @@ gtest_discover_tests(test_apply_patch) add_executable(test_net_smoke test_net_smoke.cpp ${TEST_SOURCES}) target_link_libraries(test_net_smoke PRIVATE GTest::gtest_main spdlog::spdlog CURL::libcurl nlohmann_json::nlohmann_json) target_include_directories(test_net_smoke PRIVATE ../include) -# Setup net test properties gtest_discover_tests(test_net_smoke PROPERTIES LABELS "net") + +add_executable(test_docgen_types test_docgen_types.cpp ${TEST_SOURCES}) +target_link_libraries(test_docgen_types PRIVATE GTest::gtest_main spdlog::spdlog CURL::libcurl nlohmann_json::nlohmann_json) +target_include_directories(test_docgen_types PRIVATE ../include) +gtest_discover_tests(test_docgen_types) diff --git a/tests/test_docgen_types.cpp b/tests/test_docgen_types.cpp new file mode 100644 index 0000000..83af783 --- /dev/null +++ b/tests/test_docgen_types.cpp @@ -0,0 +1,252 @@ +#include +#include "docgen_types.hpp" +#include + +using json = nlohmann::json; + +class DocgenTypesTest : public ::testing::Test { +protected: + void SetUp() override {} +}; + +TEST_F(DocgenTypesTest, AffectedFile_Serialization) { + docgen::AffectedFile f; + f.path = "src/main.cpp"; + f.change_type = "modify"; + f.public_symbols = {"main", "run"}; + + json j = f; + EXPECT_EQ(j["path"], "src/main.cpp"); + EXPECT_EQ(j["change_type"], "modify"); + EXPECT_TRUE(j.contains("public_symbols")); + EXPECT_EQ(j["public_symbols"].size(), 2); + + auto f2 = j.get(); + EXPECT_EQ(f2.path, f.path); + EXPECT_EQ(f2.change_type, f.change_type); + EXPECT_EQ(f2.public_symbols, f.public_symbols); + EXPECT_FALSE(f2.old_path.has_value()); +} + +TEST_F(DocgenTypesTest, AffectedFile_WithOldPath) { + docgen::AffectedFile f; + f.path = "src/new.cpp"; + f.change_type = "rename"; + f.old_path = "src/old.cpp"; + + json j = f; + EXPECT_TRUE(j.contains("old_path")); + EXPECT_EQ(j["old_path"], "src/old.cpp"); + + auto f2 = j.get(); + EXPECT_TRUE(f2.old_path.has_value()); + EXPECT_EQ(*f2.old_path, "src/old.cpp"); +} + +TEST_F(DocgenTypesTest, ChangeAnalystResult_Serialization) { + docgen::ChangeAnalystResult r; + r.affected_files.push_back({"src/a.cpp", "add", std::nullopt, {}}); + r.affected_files.push_back({"src/b.cpp", "delete", std::nullopt, {"func1"}}); + r.intent_summary = "Add new feature X"; + + json j = r; + EXPECT_EQ(j["affected_files"].size(), 2); + EXPECT_EQ(j["intent_summary"], "Add new feature X"); + + auto r2 = j.get(); + EXPECT_EQ(r2.affected_files.size(), 2); + EXPECT_EQ(r2.intent_summary, r.intent_summary); +} + +TEST_F(DocgenTypesTest, DocLocation_Serialization) { + docgen::DocLocation loc; + loc.target_file = "README.md"; + loc.start_line = 10; + loc.end_line = 20; + loc.section_heading = "Installation"; + + json j = loc; + EXPECT_EQ(j["target_file"], "README.md"); + EXPECT_EQ(j["start_line"], 10); + EXPECT_EQ(j["end_line"], 20); + EXPECT_EQ(j["section_heading"], "Installation"); + + auto loc2 = j.get(); + EXPECT_EQ(loc2.target_file, loc.target_file); + EXPECT_EQ(loc2.start_line, loc.start_line); + EXPECT_EQ(loc2.end_line, loc.end_line); + EXPECT_TRUE(loc2.section_heading.has_value()); +} + +TEST_F(DocgenTypesTest, ContextRouterResult_Serialization) { + docgen::ContextRouterResult r; + r.locations.push_back({"README.md", 1, 10, "Header"}); + r.locations.push_back({"docs/guide.md", 50, 60, std::nullopt}); + + json j = r; + EXPECT_EQ(j["locations"].size(), 2); + + auto r2 = j.get(); + EXPECT_EQ(r2.locations.size(), 2); + EXPECT_EQ(r2.locations[0].section_heading.value(), "Header"); + EXPECT_FALSE(r2.locations[1].section_heading.has_value()); +} + +TEST_F(DocgenTypesTest, PatchAction_Conversion) { + EXPECT_EQ(docgen::patch_action_to_string(docgen::PatchAction::Replace), "replace"); + EXPECT_EQ(docgen::patch_action_to_string(docgen::PatchAction::InsertBefore), "insert_before"); + EXPECT_EQ(docgen::patch_action_to_string(docgen::PatchAction::InsertAfter), "insert_after"); + EXPECT_EQ(docgen::patch_action_to_string(docgen::PatchAction::Delete), "delete"); + + auto a1 = docgen::patch_action_from_string("replace"); + EXPECT_TRUE(a1.has_value()); + EXPECT_EQ(*a1, docgen::PatchAction::Replace); + + auto a2 = docgen::patch_action_from_string("invalid"); + EXPECT_FALSE(a2.has_value()); +} + +TEST_F(DocgenTypesTest, TextPatch_Serialization) { + docgen::TextPatch p; + p.action = docgen::PatchAction::Replace; + p.old_text = "old content"; + p.new_text = "new content"; + p.line_hint = 42; + + json j = p; + EXPECT_EQ(j["action"], "replace"); + EXPECT_EQ(j["old_text"], "old content"); + EXPECT_EQ(j["new_text"], "new content"); + EXPECT_EQ(j["line_hint"], 42); + + auto p2 = j.get(); + EXPECT_EQ(p2.action, p.action); + EXPECT_EQ(p2.old_text, p.old_text); + EXPECT_EQ(p2.new_text, p.new_text); + EXPECT_TRUE(p2.line_hint.has_value()); + EXPECT_EQ(*p2.line_hint, 42); +} + +TEST_F(DocgenTypesTest, TextPatch_DeleteAction) { + docgen::TextPatch p; + p.action = docgen::PatchAction::Delete; + p.old_text = "text to delete"; + p.new_text = ""; + + json j = p; + auto p2 = j.get(); + EXPECT_EQ(p2.action, docgen::PatchAction::Delete); + EXPECT_TRUE(p2.new_text.empty()); +} + +TEST_F(DocgenTypesTest, PatchWriterResult_Serialization) { + docgen::PatchWriterResult r; + r.patches.push_back({docgen::PatchAction::Replace, "old", "new", std::nullopt}); + r.rationale = "Fix typo"; + + json j = r; + EXPECT_EQ(j["patches"].size(), 1); + EXPECT_EQ(j["rationale"], "Fix typo"); + + auto r2 = j.get(); + EXPECT_EQ(r2.patches.size(), 1); + EXPECT_TRUE(r2.rationale.has_value()); + EXPECT_EQ(*r2.rationale, "Fix typo"); +} + +TEST_F(DocgenTypesTest, PatchWriterResult_EmptyPatches) { + docgen::PatchWriterResult r; + + json j = r; + EXPECT_TRUE(j["patches"].empty()); + EXPECT_FALSE(j.contains("rationale")); + + auto r2 = j.get(); + EXPECT_TRUE(r2.patches.empty()); + EXPECT_FALSE(r2.rationale.has_value()); +} + +TEST_F(DocgenTypesTest, MicroReviewResult_Approve) { + docgen::MicroReviewResult r; + r.verdict = docgen::ReviewVerdict::Approve; + r.reason = "Change looks correct"; + + json j = r; + EXPECT_EQ(j["verdict"], "approve"); + EXPECT_EQ(j["reason"], "Change looks correct"); + + auto r2 = j.get(); + EXPECT_EQ(r2.verdict, docgen::ReviewVerdict::Approve); +} + +TEST_F(DocgenTypesTest, MicroReviewResult_Reject) { + docgen::MicroReviewResult r; + r.verdict = docgen::ReviewVerdict::Reject; + r.reason = "Found issues"; + r.issues = {"Broken link", "Missing comma"}; + + json j = r; + EXPECT_EQ(j["verdict"], "reject"); + EXPECT_EQ(j["issues"].size(), 2); + + auto r2 = j.get(); + EXPECT_EQ(r2.verdict, docgen::ReviewVerdict::Reject); + EXPECT_EQ(r2.issues.size(), 2); +} + +TEST_F(DocgenTypesTest, DocOutline_Serialization) { + docgen::DocOutline outline; + outline.file_path = "README.md"; + outline.headings.push_back({1, 1, "Title"}); + outline.headings.push_back({2, 10, "Section 1"}); + outline.headings.push_back({3, 15, "Subsection"}); + + json j = outline; + EXPECT_EQ(j["file_path"], "README.md"); + EXPECT_EQ(j["headings"].size(), 3); + EXPECT_EQ(j["headings"][0]["level"], 1); + EXPECT_EQ(j["headings"][0]["line_number"], 1); + EXPECT_EQ(j["headings"][0]["text"], "Title"); + + auto o2 = j.get(); + EXPECT_EQ(o2.file_path, outline.file_path); + EXPECT_EQ(o2.headings.size(), 3); +} + +TEST_F(DocgenTypesTest, SubagentContext_DefaultValues) { + docgen::SubagentContext ctx; + EXPECT_EQ(ctx.max_input_bytes, 16000); + EXPECT_EQ(ctx.timeout_ms, 60000); + EXPECT_EQ(ctx.max_retries, 2); +} + +TEST_F(DocgenTypesTest, RoundTrip_AllTypes) { + docgen::ChangeAnalystResult analyst; + analyst.affected_files = {{"a.cpp", "modify", std::nullopt, {"f1"}}}; + analyst.intent_summary = "test"; + + json j1 = analyst; + auto a2 = j1.get(); + EXPECT_EQ(a2.intent_summary, "test"); + + docgen::PatchWriterResult writer; + writer.patches = {{docgen::PatchAction::Replace, "a", "b", std::nullopt}}; + writer.rationale = "r"; + + json j2 = writer; + auto w2 = j2.get(); + EXPECT_EQ(w2.patches[0].old_text, "a"); + + docgen::MicroReviewResult review; + review.verdict = docgen::ReviewVerdict::Approve; + review.reason = "ok"; + + json j3 = review; + auto r2 = j3.get(); + EXPECT_EQ(r2.verdict, docgen::ReviewVerdict::Approve); +} + +TEST_F(DocgenTypesTest, InvalidPatchAction_Throws) { + json j = {{"action", "invalid_action"}, {"old_text", "a"}, {"new_text", "b"}}; + EXPECT_THROW(j.get(), std::runtime_error); +} From 095a793b9dc39a03f9ebd716c8856fa9b699dd9d Mon Sep 17 00:00:00 2001 From: leelaugh <2560536821@qq.com> Date: Mon, 30 Mar 2026 11:54:23 +0800 Subject: [PATCH 3/4] fix: repair message compaction and expand subagent delegation - fix(agent_loop): lower compaction threshold from 5 to 3 and fix boundary condition - fix(tests): increase max_context_bytes from 100 to 1000 in compaction tests - feat(agent_loop): implement subagent delegation with strict child limits - feat(agent_tools): add delegate_subagent tool to schema - test(agent_loop): add coverage for compaction and subagent scenarios --- include/agent_tools.hpp | 2 +- src/agent_loop.cpp | 687 ++++++++++++++++++++--- src/agent_tools.cpp | 48 +- tests/test_agent_loop_limits.cpp | 303 +++++++++- tests/test_agent_mock_e2e.cpp | 90 +++ tests/test_schema_and_args_tolerance.cpp | 30 +- 6 files changed, 1081 insertions(+), 79 deletions(-) diff --git a/include/agent_tools.hpp b/include/agent_tools.hpp index 83fbe57..aac7c17 100644 --- a/include/agent_tools.hpp +++ b/include/agent_tools.hpp @@ -12,7 +12,7 @@ std::string execute_tool(const ToolCall& cmd, const AgentConfig& config); // Returns the JSON schema representing the tools the agent is capable of running -nlohmann::json get_agent_tools_schema(); +nlohmann::json get_agent_tools_schema(bool include_delegate_subagent = true); // Returns the default built-in tool registry used by the agent runtime. const ToolRegistry& get_default_tool_registry(); diff --git a/src/agent_loop.cpp b/src/agent_loop.cpp index c92a50b..60c0ac2 100644 --- a/src/agent_loop.cpp +++ b/src/agent_loop.cpp @@ -9,15 +9,20 @@ #include #include #include +#include #include #include #include +#include namespace { constexpr std::size_t kFailedTestsJoinLimit = 3; constexpr int kMaxFailureRetries = 1; constexpr std::size_t kMaxFingerprintArgumentsBytes = 128; +constexpr std::size_t kCompactionRecentNonSystemMessages = 3; +constexpr std::size_t kCompactionSourceMaxBytes = 24 * 1024; +constexpr std::size_t kDelegateErrorPreviewBytes = 256; enum class ToolFailureClass { None, @@ -34,6 +39,26 @@ struct ToolFailureAnalysis { std::string fingerprint; }; +struct DelegateRequest { + std::string role; + std::string task; + std::vector context_files; + std::string context_notes; + std::string expected_output; + int max_turns = 0; +}; + +struct AgentSessionOptions { + bool child_mode = false; +}; + +struct AgentSessionResult { + nlohmann::json messages = nlohmann::json::array(); + bool completed = false; + bool state_contaminated = false; + std::string final_content; +}; + const char* tool_failure_class_to_string(ToolFailureClass classification) { switch (classification) { case ToolFailureClass::None: @@ -324,6 +349,420 @@ ToolFailureAnalysis analyze_build_test_result(const ToolCall& tc, return {}; } +std::string make_recovery_guidance_message(const ToolCall& tc, const ToolFailureAnalysis& analysis) { + return "Tool recovery guidance for " + tc.name + ": classification=" + + tool_failure_class_to_string(analysis.classification) + ". " + analysis.guidance; +} + +std::string make_skipped_tool_output() { + return nlohmann::json{ + {"ok", false}, + {"status", "skipped"}, + {"reason", "not executed due to prior tool failure"}, + }.dump(); +} + +void append_tool_message(nlohmann::json& messages, + const std::string& tool_call_id, + const std::string& name, + const std::string& content) { + messages.push_back({ + {"role", "tool"}, + {"tool_call_id", tool_call_id}, + {"name", name}, + {"content", content}, + }); +} + +std::string preview_text(const std::string& text, std::size_t max_bytes) { + if (text.size() <= max_bytes) { + return text; + } + return text.substr(0, max_bytes) + "..."; +} + +std::size_t messages_size_bytes(const nlohmann::json& messages) { + return messages.dump().size(); +} + +nlohmann::json filter_delegate_subagent_tool(const nlohmann::json& tools_registry) { + if (!tools_registry.is_array()) { + return nlohmann::json::array(); + } + + nlohmann::json filtered = nlohmann::json::array(); + for (const auto& tool : tools_registry) { + if (!tool.is_object() || !tool.contains("function")) { + continue; + } + if (tool["function"].value("name", "") == "delegate_subagent") { + continue; + } + filtered.push_back(tool); + } + return filtered; +} + +std::string build_compaction_source(const nlohmann::json& old_messages) { + std::string source; + source.reserve(std::min(old_messages.dump().size(), kCompactionSourceMaxBytes)); + + for (const auto& message : old_messages) { + std::string block = "role=" + message.value("role", ""); + if (message.value("role", "") == "tool") { + block += " tool=" + message.value("name", ""); + block += " tool_call_id=" + message.value("tool_call_id", ""); + } + block += "\n"; + block += message.value("content", ""); + block += "\n\n"; + + if (source.size() + block.size() > kCompactionSourceMaxBytes) { + const std::size_t keep = kCompactionSourceMaxBytes > source.size() ? + kCompactionSourceMaxBytes - source.size() : 0; + source += block.substr(0, keep); + source += "\n"; + break; + } + source += block; + } + + return source; +} + +void compact_messages_if_needed(const AgentConfig& config, + nlohmann::json& messages, + LLMStreamFunc llm_func) { + if (!messages.is_array() || messages.size() <= 2) { + return; + } + if (config.max_context_bytes == 0 || messages_size_bytes(messages) <= config.max_context_bytes) { + return; + } + + + + std::vector non_system_indices; + non_system_indices.reserve(messages.size()); + for (std::size_t i = 1; i < messages.size(); ++i) { + if (messages[i].value("role", "") != "system") { + non_system_indices.push_back(i); + } + } + + + + if (non_system_indices.size() < kCompactionRecentNonSystemMessages) { + LOG_INFO("Compaction skipped: not enough non-system messages"); + return; + } + + std::size_t recent_start_index; + if (non_system_indices.size() == kCompactionRecentNonSystemMessages) { + // 当非系统消息数量正好等于保留数量时,至少保留一条消息用于压缩 + // 跳过第一条非系统消息,从第二条开始作为最近消息 + recent_start_index = non_system_indices[1]; + LOG_INFO("Compaction boundary case: using recent_start_index={} (second non-system message)", recent_start_index); + } else { + recent_start_index = non_system_indices[non_system_indices.size() - kCompactionRecentNonSystemMessages]; + LOG_INFO("Compaction normal case: recent_start_index={}", recent_start_index); + } + nlohmann::json old_messages = nlohmann::json::array(); + for (std::size_t i = 1; i < recent_start_index; ++i) { + old_messages.push_back(messages[i]); + } + LOG_INFO("Compaction: old_messages size={}", old_messages.size()); + if (old_messages.empty()) { + LOG_INFO("Compaction skipped: old_messages is empty"); + return; + } + + const std::string source = build_compaction_source(old_messages); + LOG_INFO("Compaction: source size={} bytes", source.size()); + if (source.empty()) { + LOG_INFO("Compaction skipped: source is empty"); + return; + } + + nlohmann::json compaction_messages = nlohmann::json::array({ + { + {"role", "system"}, + {"content", + "You are a context compactor for an autonomous coding agent. " + "Return only a concise factual summary. Preserve important file paths, commands, decisions, and errors. " + "Do not call tools."} + }, + { + {"role", "user"}, + {"content", + "Summarize the older conversation history below so the agent can continue working.\n" + "Keep the summary compact but preserve concrete facts, especially paths, commands, decisions, and errors.\n\n" + + source} + } + }); + + nlohmann::json response; + try { + response = llm_func(config, compaction_messages, nlohmann::json::array()); + } catch (const std::exception& e) { + LOG_ERROR("Context compaction failed: {}", e.what()); + return; + } + + const std::string summary = response.value("content", ""); + LOG_INFO("Compaction: received summary, size={} bytes", summary.size()); + if (summary.empty()) { + LOG_INFO("Compaction skipped: summary is empty"); + return; + } + + nlohmann::json compacted = nlohmann::json::array(); + compacted.push_back(messages[0]); + compacted.push_back({ + {"role", "system"}, + {"content", "Compacted earlier context summary:\n" + summary} + }); + for (std::size_t i = recent_start_index; i < messages.size(); ++i) { + compacted.push_back(messages[i]); + } + + size_t original_size = messages_size_bytes(messages); + size_t compacted_size = messages_size_bytes(compacted); + LOG_INFO("Compaction: original_size={}, compacted_size={}", original_size, compacted_size); + if (compacted_size < original_size) { + messages = std::move(compacted); + LOG_INFO("Compaction applied: reduced size by {} bytes", original_size - compacted_size); + } else { + LOG_INFO("Compaction not applied: compacted size not smaller"); + } +} + +bool parse_string_array_argument(const nlohmann::json& arguments, + const char* key, + std::vector* out, + std::string* error) { + out->clear(); + if (!arguments.contains(key)) { + return true; + } + if (!arguments.at(key).is_array()) { + *error = "Argument '" + std::string(key) + "' must be an array of strings."; + return false; + } + for (const auto& item : arguments.at(key)) { + if (!item.is_string()) { + *error = "Argument '" + std::string(key) + "' must be an array of strings."; + return false; + } + out->push_back(item.get()); + } + return true; +} + +bool parse_delegate_request(const ToolCall& tc, DelegateRequest* out, std::string* error) { + const auto& arguments = tc.arguments; + if (!arguments.is_object()) { + *error = "delegate_subagent arguments must be a JSON object."; + return false; + } + + static const std::vector kAllowedKeys{ + "role", "task", "context_files", "context_notes", "expected_output", "max_turns"}; + for (const auto& [key, _] : arguments.items()) { + if (std::find(kAllowedKeys.begin(), kAllowedKeys.end(), key) == kAllowedKeys.end()) { + *error = "Unknown argument '" + key + "' for delegate_subagent."; + return false; + } + } + + if (!arguments.contains("role") || !arguments.at("role").is_string() || + arguments.at("role").get().empty()) { + *error = "Argument 'role' for delegate_subagent must be a non-empty string."; + return false; + } + if (!arguments.contains("task") || !arguments.at("task").is_string() || + arguments.at("task").get().empty()) { + *error = "Argument 'task' for delegate_subagent must be a non-empty string."; + return false; + } + + out->role = arguments.at("role").get(); + out->task = arguments.at("task").get(); + out->context_notes = arguments.value("context_notes", ""); + out->expected_output = arguments.value("expected_output", ""); + out->max_turns = 0; + + if (arguments.contains("context_notes") && !arguments.at("context_notes").is_string()) { + *error = "Argument 'context_notes' must be a string."; + return false; + } + if (arguments.contains("expected_output") && !arguments.at("expected_output").is_string()) { + *error = "Argument 'expected_output' must be a string."; + return false; + } + if (!parse_string_array_argument(arguments, "context_files", &out->context_files, error)) { + return false; + } + + if (arguments.contains("max_turns")) { + const auto& value = arguments.at("max_turns"); + if (!value.is_number_integer() && !value.is_number_unsigned()) { + *error = "Argument 'max_turns' must be a positive integer."; + return false; + } + out->max_turns = value.get(); + if (out->max_turns <= 0) { + *error = "Argument 'max_turns' must be a positive integer."; + return false; + } + } + + return true; +} + +nlohmann::json make_delegate_result_base(const DelegateRequest& request) { + return nlohmann::json{ + {"ok", false}, + {"role", request.role}, + {"task", request.task}, + {"result_summary", ""}, + {"files_touched", nlohmann::json::array()}, + {"key_facts", nlohmann::json::array()}, + {"open_questions", nlohmann::json::array()}, + {"commands_ran", nlohmann::json::array()}, + {"verification_passed", false}, + {"error", ""} + }; +} + +nlohmann::json make_delegate_error_result(const DelegateRequest& request, const std::string& error) { + nlohmann::json result = make_delegate_result_base(request); + result["error"] = error; + return result; +} + +bool normalize_string_array_field(const nlohmann::json& source, + const char* key, + nlohmann::json* destination, + std::string* error) { + if (!source.contains(key)) { + (*destination)[key] = nlohmann::json::array(); + return true; + } + if (!source.at(key).is_array()) { + *error = "Child summary field '" + std::string(key) + "' must be an array of strings."; + return false; + } + nlohmann::json normalized = nlohmann::json::array(); + for (const auto& item : source.at(key)) { + if (!item.is_string()) { + *error = "Child summary field '" + std::string(key) + "' must be an array of strings."; + return false; + } + normalized.push_back(item.get()); + } + (*destination)[key] = std::move(normalized); + return true; +} + +std::string build_child_system_prompt(const DelegateRequest& request) { + return "You are a temporary delegated subagent for the role '" + request.role + "'. " + "You are not the main orchestrator. Use only the provided tools for this narrow task. " + "Do not call delegate_subagent. When you finish, return only a JSON object with these fields: " + "ok, result_summary, files_touched, key_facts, open_questions, commands_ran, verification_passed, error."; +} + +std::string build_child_user_prompt(const DelegateRequest& request) { + std::ostringstream prompt; + prompt << "Task:\n" << request.task << "\n\n"; + if (!request.context_files.empty()) { + prompt << "Inspect these files first if relevant:\n"; + for (const auto& path : request.context_files) { + prompt << "- " << path << "\n"; + } + prompt << "\n"; + } + if (!request.context_notes.empty()) { + prompt << "Context notes:\n" << request.context_notes << "\n\n"; + } + if (!request.expected_output.empty()) { + prompt << "Expected output focus:\n" << request.expected_output << "\n\n"; + } + prompt << "Return only the required JSON object. Keep it concise."; + return prompt.str(); +} + +bool derive_stricter_child_limit(int parent_limit, int requested_limit, int* out_limit, std::string* error) { + if (parent_limit <= 1) { + *error = "Parent limits are too tight to create a strictly smaller child session."; + return false; + } + + int child_limit = std::max(1, parent_limit / 2); + if (child_limit >= parent_limit) { + child_limit = parent_limit - 1; + } + if (requested_limit > 0) { + child_limit = std::min(child_limit, requested_limit); + } + if (child_limit <= 0 || child_limit >= parent_limit) { + *error = "Unable to derive strictly tighter child limits from the current parent configuration."; + return false; + } + + *out_limit = child_limit; + return true; +} + +bool derive_stricter_child_size_limit(std::size_t parent_limit, std::size_t* out_limit, std::string* error) { + if (parent_limit <= 1) { + *error = "Parent byte limits are too tight to create a strictly smaller child session."; + return false; + } + + std::size_t child_limit = std::max(1, parent_limit / 2); + if (child_limit >= parent_limit) { + child_limit = parent_limit - 1; + } + if (child_limit == 0 || child_limit >= parent_limit) { + *error = "Unable to derive strictly tighter child byte limits from the current parent configuration."; + return false; + } + + *out_limit = child_limit; + return true; +} + +bool derive_child_config(const AgentConfig& parent, + const DelegateRequest& request, + AgentConfig* child, + std::string* error) { + *child = parent; + if (!derive_stricter_child_limit(parent.max_turns, request.max_turns, &child->max_turns, error)) { + return false; + } + if (!derive_stricter_child_limit(parent.max_tool_calls_per_turn, 0, &child->max_tool_calls_per_turn, error)) { + return false; + } + if (!derive_stricter_child_limit(parent.max_total_tool_calls, 0, &child->max_total_tool_calls, error)) { + return false; + } + if (child->max_total_tool_calls < child->max_tool_calls_per_turn) { + child->max_total_tool_calls = child->max_tool_calls_per_turn; + if (child->max_total_tool_calls >= parent.max_total_tool_calls) { + *error = "Unable to keep child total tool calls below the parent limit."; + return false; + } + } + if (!derive_stricter_child_size_limit(parent.max_tool_output_bytes, &child->max_tool_output_bytes, error)) { + return false; + } + if (!derive_stricter_child_size_limit(parent.max_context_bytes, &child->max_context_bytes, error)) { + return false; + } + return true; +} + ToolFailureAnalysis analyze_tool_result(const ToolCall& tc, const std::string& raw_output) { nlohmann::json result; try { @@ -362,6 +801,13 @@ ToolFailureAnalysis analyze_tool_result(const ToolCall& tc, const std::string& r return analyze_build_test_result(tc, result, ok, timed_out, status); } + if (tc.name == "delegate_subagent" && !ok) { + return make_failure_analysis( + ToolFailureClass::NeedsInspection, + "Recommended next action: inspect the structured child error summary and decide whether to retry the delegated task or continue locally.", + "delegate_subagent:" + compact_arguments_fingerprint(tc.arguments)); + } + if (!ok || timed_out || status == "failed" || status == "timed_out") { return make_failure_analysis( ToolFailureClass::Fatal, @@ -372,97 +818,167 @@ ToolFailureAnalysis analyze_tool_result(const ToolCall& tc, const std::string& r return {}; } -std::string make_recovery_guidance_message(const ToolCall& tc, const ToolFailureAnalysis& analysis) { - return "Tool recovery guidance for " + tc.name + ": classification=" + - tool_failure_class_to_string(analysis.classification) + ". " + analysis.guidance; -} +AgentSessionResult run_agent_session(const AgentConfig& config, + const std::string& system_prompt, + const std::string& user_prompt, + const nlohmann::json& tools_registry, + LLMStreamFunc llm_func, + const AgentSessionOptions& options); -std::string make_skipped_tool_output() { - return nlohmann::json{ - {"ok", false}, - {"status", "skipped"}, - {"reason", "not executed due to prior tool failure"}, - }.dump(); -} +nlohmann::json run_subagent(const ToolCall& tc, + const AgentConfig& parent_config, + const nlohmann::json& parent_tools_registry, + LLMStreamFunc llm_func) { + DelegateRequest request; + std::string error; + if (!parse_delegate_request(tc, &request, &error)) { + DelegateRequest fallback; + if (tc.arguments.is_object()) { + fallback.role = tc.arguments.value("role", ""); + fallback.task = tc.arguments.value("task", ""); + } + return make_delegate_error_result(fallback, error); + } -void append_tool_message(nlohmann::json& messages, - const std::string& tool_call_id, - const std::string& name, - const std::string& content) { - messages.push_back({ - {"role", "tool"}, - {"tool_call_id", tool_call_id}, - {"name", name}, - {"content", content}, - }); + AgentConfig child_config; + if (!derive_child_config(parent_config, request, &child_config, &error)) { + return make_delegate_error_result(request, error); + } + + const nlohmann::json child_tools = filter_delegate_subagent_tool(parent_tools_registry); + AgentSessionResult child_result = run_agent_session( + child_config, + build_child_system_prompt(request), + build_child_user_prompt(request), + child_tools, + llm_func, + AgentSessionOptions{.child_mode = true}); + + if (!child_result.completed) { + return make_delegate_error_result(request, "Child run ended without a final summary."); + } + + if (child_result.final_content.empty()) { + return make_delegate_error_result(request, "Child run finished without returning a summary payload."); + } + + nlohmann::json parsed_summary; + try { + parsed_summary = nlohmann::json::parse(child_result.final_content); + } catch (const std::exception& e) { + return make_delegate_error_result( + request, + "Child summary was not valid JSON: " + std::string(e.what()) + + ". Raw preview: " + preview_text(child_result.final_content, kDelegateErrorPreviewBytes)); + } + + if (!parsed_summary.is_object()) { + return make_delegate_error_result(request, "Child summary must be a JSON object."); + } + + nlohmann::json normalized = make_delegate_result_base(request); + normalized["ok"] = parsed_summary.value("ok", true); + normalized["result_summary"] = parsed_summary.value("result_summary", ""); + normalized["verification_passed"] = parsed_summary.value("verification_passed", false); + normalized["error"] = parsed_summary.value("error", ""); + + if (!normalized["result_summary"].is_string()) { + return make_delegate_error_result(request, "Child summary field 'result_summary' must be a string."); + } + if (!parsed_summary.contains("verification_passed") || !parsed_summary.at("verification_passed").is_boolean()) { + normalized["verification_passed"] = false; + } + if (!normalized["error"].is_string()) { + return make_delegate_error_result(request, "Child summary field 'error' must be a string."); + } + if (!normalize_string_array_field(parsed_summary, "files_touched", &normalized, &error) || + !normalize_string_array_field(parsed_summary, "key_facts", &normalized, &error) || + !normalize_string_array_field(parsed_summary, "open_questions", &normalized, &error) || + !normalize_string_array_field(parsed_summary, "commands_ran", &normalized, &error)) { + return make_delegate_error_result(request, error); + } + + if (!normalized["ok"].get() && normalized["error"].get().empty()) { + normalized["error"] = "Child reported failure without an error message."; + } + return normalized; } -} // namespace +AgentSessionResult run_agent_session(const AgentConfig& config, + const std::string& system_prompt, + const std::string& user_prompt, + const nlohmann::json& tools_registry, + LLMStreamFunc llm_func, + const AgentSessionOptions& options) { + AgentSessionResult session; -void agent_run(const AgentConfig& config, - const std::string& system_prompt, - const std::string& user_prompt, - const nlohmann::json& tools_registry, - LLMStreamFunc llm_func) { - - nlohmann::json messages = nlohmann::json::array(); - if (!system_prompt.empty()) { - messages.push_back({{"role", "system"}, {"content", system_prompt}}); + session.messages.push_back({{"role", "system"}, {"content", system_prompt}}); } - messages.push_back({{"role", "user"}, {"content", user_prompt}}); - + session.messages.push_back({{"role", "user"}, {"content", user_prompt}}); + int turns = 0; int total_tool_calls = 0; std::unordered_map failure_retry_counts; - + while (true) { turns++; if (turns > config.max_turns) { LOG_ERROR("Broker loop hit max_turns (" + std::to_string(config.max_turns) + "), aborting to prevent infinite loop."); - std::cerr << "[Agent Error] Exceeded max standard turns.\n"; + if (!options.child_mode) { + std::cerr << "[Agent Error] Exceeded max standard turns.\n"; + } break; } - - enforce_context_limits(messages, config.max_context_bytes); - + + compact_messages_if_needed(config, session.messages, llm_func); + enforce_context_limits(session.messages, config.max_context_bytes); + LOG_INFO("Agent Turn " + std::to_string(turns) + " started..."); - - // Execute LLM Step + nlohmann::json response_message; try { - response_message = llm_func(config, messages, tools_registry); + response_message = llm_func(config, session.messages, tools_registry); } catch (const std::exception& e) { LOG_ERROR(std::string("LLM Execution failed: ") + e.what()); - std::cerr << "[Agent Error] LLM request failed: " << e.what() << "\n"; + if (!options.child_mode) { + std::cerr << "[Agent Error] LLM request failed: " << e.what() << "\n"; + } break; } - - messages.push_back(response_message); - + + session.messages.push_back(response_message); + if (!response_message.contains("tool_calls") || response_message["tool_calls"].empty()) { LOG_INFO("No tool calls. Agent loop complete."); - std::cout << "[Agent Complete] " << response_message.value("content", "") << "\n"; + session.completed = true; + session.final_content = response_message.value("content", ""); + if (!options.child_mode) { + std::cout << "[Agent Complete] " << session.final_content << "\n"; + } break; } - + auto tool_calls = response_message["tool_calls"]; if (tool_calls.size() > config.max_tool_calls_per_turn) { LOG_ERROR("Too many tools requested in turn: " + std::to_string(tool_calls.size())); - std::cerr << "[Agent Error] Tool flood detected, limit " << config.max_tool_calls_per_turn << ". Aborting.\n"; + if (!options.child_mode) { + std::cerr << "[Agent Error] Tool flood detected, limit " << config.max_tool_calls_per_turn << ". Aborting.\n"; + } break; } - - total_tool_calls += tool_calls.size(); + + total_tool_calls += static_cast(tool_calls.size()); if (total_tool_calls > config.max_total_tool_calls) { LOG_ERROR("Max total tool calls exceeded: " + std::to_string(total_tool_calls)); - std::cerr << "[Agent Error] Global tool call limit hit, aborting.\n"; + if (!options.child_mode) { + std::cerr << "[Agent Error] Global tool call limit hit, aborting.\n"; + } break; } bool state_contaminated = false; - - // Ensure sequentially execute tools + for (std::size_t tool_index = 0; tool_index < tool_calls.size(); ++tool_index) { const auto& tc_json = tool_calls[tool_index]; ToolCall tc; @@ -474,26 +990,40 @@ void agent_run(const AgentConfig& config, tc.arguments = nlohmann::json::parse(raw_args); } catch (...) { LOG_ERROR("Tool JSON parse failed for " + tc.name); - tc.arguments = nlohmann::json::object(); // Continue to fail fast via dispatch + tc.arguments = nlohmann::json::object(); + } + } + + std::string raw_output; + if (tc.name == "delegate_subagent") { + if (options.child_mode) { + raw_output = nlohmann::json{ + {"ok", false}, + {"status", "failed"}, + {"error", "delegate_subagent is disabled inside child sessions."} + }.dump(); + append_tool_message(session.messages, tc.id, tc.name, raw_output); + state_contaminated = true; + break; } + raw_output = run_subagent(tc, config, tools_registry, llm_func).dump(); + } else { + raw_output = execute_tool(tc, config); } - - const std::string raw_output = execute_tool(tc, config); + const ToolFailureAnalysis analysis = analyze_tool_result(tc, raw_output); - std::string output = raw_output; - - // Output length guard - output = truncate_tool_output(output, config.max_tool_output_bytes); - - append_tool_message(messages, tc.id, tc.name, output); - + std::string output = truncate_tool_output(raw_output, config.max_tool_output_bytes); + append_tool_message(session.messages, tc.id, tc.name, output); + if (!analysis.is_failure) { continue; } if (analysis.classification == ToolFailureClass::Fatal) { LOG_ERROR("Tool failed fatally: " + output); - std::cerr << "[Agent Error] Tool " << tc.name << " failed fatally: " << output << "\n"; + if (!options.child_mode) { + std::cerr << "[Agent Error] Tool " << tc.name << " failed fatally: " << output << "\n"; + } state_contaminated = true; break; } @@ -501,14 +1031,16 @@ void agent_run(const AgentConfig& config, const int seen_count = ++failure_retry_counts[analysis.fingerprint]; if (seen_count > kMaxFailureRetries) { LOG_ERROR("Tool failure repeated beyond retry budget: " + output); - std::cerr << "[Agent Error] Tool " << tc.name - << " repeated the same recoverable failure and exceeded the retry budget.\n"; + if (!options.child_mode) { + std::cerr << "[Agent Error] Tool " << tc.name + << " repeated the same recoverable failure and exceeded the retry budget.\n"; + } state_contaminated = true; break; } LOG_ERROR("Tool failed but is recoverable: " + output); - messages.push_back({ + session.messages.push_back({ {"role", "system"}, {"content", make_recovery_guidance_message(tc, analysis)} }); @@ -519,14 +1051,29 @@ void agent_run(const AgentConfig& config, if (skipped_tc_json.contains("function")) { skipped_name = skipped_tc_json["function"].value("name", ""); } - append_tool_message(messages, skipped_tool_call_id, skipped_name, make_skipped_tool_output()); + append_tool_message(session.messages, skipped_tool_call_id, skipped_name, make_skipped_tool_output()); } break; } - + if (state_contaminated) { - std::cerr << "[Agent Error] Run stopped due to state contamination (tool failure or timeout).\n"; + session.state_contaminated = true; + if (!options.child_mode) { + std::cerr << "[Agent Error] Run stopped due to state contamination (tool failure or timeout).\n"; + } break; } } + + return session; +} + +} // namespace + +void agent_run(const AgentConfig& config, + const std::string& system_prompt, + const std::string& user_prompt, + const nlohmann::json& tools_registry, + LLMStreamFunc llm_func) { + run_agent_session(config, system_prompt, user_prompt, tools_registry, llm_func, AgentSessionOptions{}); } diff --git a/src/agent_tools.cpp b/src/agent_tools.cpp index 8bfeb16..d011596 100644 --- a/src/agent_tools.cpp +++ b/src/agent_tools.cpp @@ -229,6 +229,46 @@ void register_or_throw(ToolRegistry* registry, ToolDescriptor descriptor) { } } +nlohmann::json make_delegate_subagent_schema_entry() { + return { + {"type", "function"}, + {"function", { + {"name", "delegate_subagent"}, + {"description", + "Delegates a bounded, temporary sub-task to a disposable child agent. " + "Use only for narrow side tasks that benefit from isolated context. " + "The child cannot delegate again and returns a compact structured summary."}, + {"parameters", make_parameters_schema({ + {"role", { + {"type", "string"}, + {"description", "Short role label describing the child specialization for this task."} + }}, + {"task", { + {"type", "string"}, + {"description", "The exact delegated task for the child agent."} + }}, + {"context_files", { + {"type", "array"}, + {"items", {{"type", "string"}}}, + {"description", "Optional repository-relative files the child should inspect first."} + }}, + {"context_notes", { + {"type", "string"}, + {"description", "Optional brief context or constraints from the parent."} + }}, + {"expected_output", { + {"type", "string"}, + {"description", "Describe the concise result summary the child should return."} + }}, + {"max_turns", { + {"type", "integer"}, + {"description", "Optional maximum child turns. The runtime may clamp this below the parent limits."} + }} + }, {"role", "task"})} + }} + }; +} + ToolRegistry build_default_tool_registry() { ToolRegistry registry; @@ -860,6 +900,10 @@ std::string execute_tool(const ToolCall& cmd, const AgentConfig& config) { return get_default_tool_registry().execute(cmd, config); } -nlohmann::json get_agent_tools_schema() { - return get_default_tool_registry().to_openai_schema(); +nlohmann::json get_agent_tools_schema(bool include_delegate_subagent) { + nlohmann::json schema = get_default_tool_registry().to_openai_schema(); + if (include_delegate_subagent) { + schema.push_back(make_delegate_subagent_schema_entry()); + } + return schema; } diff --git a/tests/test_agent_loop_limits.cpp b/tests/test_agent_loop_limits.cpp index 009c23f..98d622d 100644 --- a/tests/test_agent_loop_limits.cpp +++ b/tests/test_agent_loop_limits.cpp @@ -3,6 +3,7 @@ #include "agent_utils.hpp" #include "config.hpp" #include "logger.hpp" +#include "agent_tools.hpp" #include #include #include @@ -42,6 +43,22 @@ std::string first_message_content_with_role(const nlohmann::json& messages, cons return ""; } +std::string nth_message_content_with_role(const nlohmann::json& messages, + const std::string& role, + int ordinal) { + int seen = 0; + for (const auto& message : messages) { + if (message.value("role", "") != role) { + continue; + } + if (seen == ordinal) { + return message.value("content", ""); + } + ++seen; + } + return ""; +} + std::string tool_message_content_for_id(const nlohmann::json& messages, const std::string& tool_call_id) { for (const auto& message : messages) { if (message.value("role", "") != "tool") { @@ -1062,14 +1079,24 @@ TEST_F(AgentLoopLimitsTest, CombinedTruncationAndContextLimit) { config.workspace_abs = test_workspace; config.max_turns = 2; // Need a second turn to test context clipping config.max_tool_output_bytes = 50; // Severely truncate individual tools - config.max_context_bytes = 900; // Tightly clamp total context + config.max_context_bytes = 500; // Tightly clamp total context - int call_count = 0; + int loop_call_count = 0; + int compaction_call_count = 0; size_t msgs_size_turn_2 = 0; LLMStreamFunc mock_llm = [&](const AgentConfig& cfg, const nlohmann::json& msgs, const nlohmann::json&) -> nlohmann::json { - call_count++; - if (call_count == 1) { + const std::string first_system = first_message_content_with_role(msgs, "system"); + if (first_system.find("context compactor") != std::string::npos) { + compaction_call_count++; + return nlohmann::json{ + {"role", "assistant"}, + {"content", "Earlier reads covered big_a.txt, big_b.txt, and big_c.txt; outputs were truncated."} + }; + } + + loop_call_count++; + if (loop_call_count == 1) { // First turn: generate lots of tools with heavy output return nlohmann::json{ {"role", "assistant"}, @@ -1089,9 +1116,275 @@ TEST_F(AgentLoopLimitsTest, CombinedTruncationAndContextLimit) { agent_run(config, "Sys", "Req", nlohmann::json::array(), mock_llm); // Expect agent went to turn 2 - EXPECT_EQ(call_count, 2); + EXPECT_EQ(loop_call_count, 2); + EXPECT_EQ(compaction_call_count, 1); // Validate output shrinkage actually cascaded nicely below global max limitation EXPECT_LE(msgs_size_turn_2, config.max_context_bytes); EXPECT_GT(msgs_size_turn_2, 0); } + +TEST_F(AgentLoopLimitsTest, DelegateSubagentChildFiltersToolsAndReturnsStructuredResult) { + write_file("delegate.txt", "delegated content"); + + AgentConfig config; + config.workspace_abs = test_workspace; + config.max_turns = 4; + config.max_tool_calls_per_turn = 4; + config.max_total_tool_calls = 8; + + int parent_turn = 0; + int child_turn = 0; + bool child_saw_delegate = false; + nlohmann::json second_turn_messages = nlohmann::json::array(); + + LLMStreamFunc mock_llm = [&](const AgentConfig&, const nlohmann::json& messages, const nlohmann::json& tools) -> nlohmann::json { + const std::string first_system = first_message_content_with_role(messages, "system"); + if (first_system.find("temporary delegated subagent") != std::string::npos) { + child_turn++; + for (const auto& tool : tools) { + if (tool["function"]["name"] == "delegate_subagent") { + child_saw_delegate = true; + } + } + + if (child_turn == 1) { + return nlohmann::json{ + {"role", "assistant"}, + {"tool_calls", {{ + {"id", "child_read"}, + {"function", { + {"name", "read_file_safe"}, + {"arguments", "{\"path\":\"delegate.txt\"}"} + }} + }}} + }; + } + + return nlohmann::json{ + {"role", "assistant"}, + {"content", + "{\"ok\":true,\"result_summary\":\"Read delegate.txt for the parent.\"," + "\"files_touched\":[\"delegate.txt\"]," + "\"key_facts\":[\"delegate.txt contains delegated content\"]," + "\"open_questions\":[]," + "\"commands_ran\":[]," + "\"verification_passed\":true," + "\"error\":\"\"}"} + }; + } + + parent_turn++; + if (parent_turn == 1) { + return nlohmann::json{ + {"role", "assistant"}, + {"tool_calls", {{ + {"id", "delegate_call"}, + {"function", { + {"name", "delegate_subagent"}, + {"arguments", + "{\"role\":\"researcher\",\"task\":\"Inspect delegate.txt\"," + "\"context_files\":[\"delegate.txt\"],\"expected_output\":\"Return the file finding\"," + "\"max_turns\":2}"} + }} + }}} + }; + } + + second_turn_messages = messages; + return nlohmann::json{{"role", "assistant"}, {"content", "done"}}; + }; + + agent_run(config, "parent system", "delegate the side task", get_agent_tools_schema(), mock_llm); + + ASSERT_EQ(parent_turn, 2); + ASSERT_EQ(child_turn, 2); + EXPECT_FALSE(child_saw_delegate); + + const auto result = nlohmann::json::parse(tool_message_content_for_id(second_turn_messages, "delegate_call")); + EXPECT_TRUE(result["ok"].get()) << result.dump(); + EXPECT_EQ(result["role"], "researcher"); + EXPECT_EQ(result["task"], "Inspect delegate.txt"); + EXPECT_EQ(result["result_summary"], "Read delegate.txt for the parent."); + EXPECT_EQ(result["files_touched"], nlohmann::json::array({"delegate.txt"})); + EXPECT_EQ(result["key_facts"], nlohmann::json::array({"delegate.txt contains delegated content"})); + EXPECT_EQ(result["commands_ran"], nlohmann::json::array()); + EXPECT_TRUE(result["verification_passed"].get()); +} + +TEST_F(AgentLoopLimitsTest, ChildCannotRecursivelyDelegate) { + AgentConfig config; + config.workspace_abs = test_workspace; + config.max_turns = 4; + config.max_tool_calls_per_turn = 4; + config.max_total_tool_calls = 8; + + int parent_turn = 0; + int child_turn = 0; + nlohmann::json second_turn_messages = nlohmann::json::array(); + + LLMStreamFunc mock_llm = [&](const AgentConfig&, const nlohmann::json& messages, const nlohmann::json&) -> nlohmann::json { + const std::string first_system = first_message_content_with_role(messages, "system"); + if (first_system.find("temporary delegated subagent") != std::string::npos) { + child_turn++; + return nlohmann::json{ + {"role", "assistant"}, + {"tool_calls", {{ + {"id", "child_delegate_again"}, + {"function", { + {"name", "delegate_subagent"}, + {"arguments", "{\"role\":\"nested\",\"task\":\"Should fail\"}"} + }} + }}} + }; + } + + parent_turn++; + if (parent_turn == 1) { + return nlohmann::json{ + {"role", "assistant"}, + {"tool_calls", {{ + {"id", "delegate_call"}, + {"function", { + {"name", "delegate_subagent"}, + {"arguments", "{\"role\":\"researcher\",\"task\":\"Attempt recursion\"}"} + }} + }}} + }; + } + + second_turn_messages = messages; + return nlohmann::json{{"role", "assistant"}, {"content", "done"}}; + }; + + agent_run(config, "parent system", "delegate the side task", get_agent_tools_schema(), mock_llm); + + ASSERT_EQ(parent_turn, 2); + ASSERT_EQ(child_turn, 1); + + const auto result = nlohmann::json::parse(tool_message_content_for_id(second_turn_messages, "delegate_call")); + EXPECT_FALSE(result["ok"].get()); + EXPECT_NE(result["error"].get().find("ended without a final summary"), std::string::npos); +} + +TEST_F(AgentLoopLimitsTest, CompactionPreservesSystemPromptAndRecentMessages) { + write_file("recent1.txt", "one" + std::string(100, 'x')); + write_file("recent2.txt", "two" + std::string(100, 'y')); + write_file("recent3.txt", "three" + std::string(100, 'z')); + write_file("recent4.txt", "four" + std::string(100, 'w')); + write_file("recent5.txt", "five" + std::string(100, 'v')); + + AgentConfig config; + config.workspace_abs = test_workspace; + config.max_turns = 2; + config.max_tool_calls_per_turn = 8; + config.max_total_tool_calls = 8; + config.max_context_bytes = 1000; + + int loop_turn = 0; + int compaction_turn = 0; + nlohmann::json second_turn_messages = nlohmann::json::array(); + + LLMStreamFunc mock_llm = [&](const AgentConfig&, const nlohmann::json& messages, const nlohmann::json&) -> nlohmann::json { + const std::string first_system = first_message_content_with_role(messages, "system"); + if (first_system.find("context compactor") != std::string::npos) { + compaction_turn++; + return nlohmann::json{ + {"role", "assistant"}, + {"content", "Summary preserved src/main.cpp, bash -lc ./build.sh test, and compile failed."} + }; + } + + loop_turn++; + if (loop_turn == 1) { + return nlohmann::json{ + {"role", "assistant"}, + {"tool_calls", { + {{"id", "recent1"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"recent1.txt\"}"}}}}, + {{"id", "recent2"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"recent2.txt\"}"}}}}, + {{"id", "recent3"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"recent3.txt\"}"}}}}, + {{"id", "recent4"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"recent4.txt\"}"}}}}, + {{"id", "recent5"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"recent5.txt\"}"}}}} + }} + }; + } + + second_turn_messages = messages; + return nlohmann::json{{"role", "assistant"}, {"content", "done"}}; + }; + + agent_run(config, + "PRIMARY SYSTEM PROMPT", + "Earlier decision: inspect src/main.cpp after bash -lc ./build.sh test failed with compile failed.", + get_agent_tools_schema(false), + mock_llm); + + EXPECT_EQ(loop_turn, 2); + EXPECT_EQ(compaction_turn, 1); + EXPECT_EQ(nth_message_content_with_role(second_turn_messages, "system", 0), "PRIMARY SYSTEM PROMPT"); + EXPECT_NE(nth_message_content_with_role(second_turn_messages, "system", 1).find("Compacted earlier context summary"), std::string::npos); + EXPECT_NE(tool_message_content_for_id(second_turn_messages, "recent5").find("five"), std::string::npos); +} + +TEST_F(AgentLoopLimitsTest, CompactionSummaryKeepsImportantFacts) { + write_file("facts1.txt", "fact one" + std::string(100, 'a')); + write_file("facts2.txt", "fact two" + std::string(100, 'b')); + write_file("facts3.txt", "fact three" + std::string(100, 'c')); + write_file("facts4.txt", "fact four" + std::string(100, 'd')); + write_file("facts5.txt", "fact five" + std::string(100, 'e')); + + AgentConfig config; + config.workspace_abs = test_workspace; + config.max_turns = 2; + config.max_tool_calls_per_turn = 8; + config.max_total_tool_calls = 8; + config.max_context_bytes = 1000; + + int loop_turn = 0; + std::string compaction_request; + nlohmann::json second_turn_messages = nlohmann::json::array(); + + LLMStreamFunc mock_llm = [&](const AgentConfig&, const nlohmann::json& messages, const nlohmann::json&) -> nlohmann::json { + const std::string first_system = first_message_content_with_role(messages, "system"); + if (first_system.find("context compactor") != std::string::npos) { + compaction_request = messages[1].value("content", ""); + return nlohmann::json{ + {"role", "assistant"}, + {"content", "Use src/main.cpp, rerun bash -lc ./build.sh test, and note compile failed."} + }; + } + + loop_turn++; + if (loop_turn == 1) { + return nlohmann::json{ + {"role", "assistant"}, + {"tool_calls", { + {{"id", "facts1"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"facts1.txt\"}"}}}}, + {{"id", "facts2"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"facts2.txt\"}"}}}}, + {{"id", "facts3"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"facts3.txt\"}"}}}}, + {{"id", "facts4"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"facts4.txt\"}"}}}}, + {{"id", "facts5"}, {"function", {{"name", "read_file_safe"}, {"arguments", "{\"path\":\"facts5.txt\"}"}}}} + }} + }; + } + + second_turn_messages = messages; + return nlohmann::json{{"role", "assistant"}, {"content", "done"}}; + }; + + agent_run(config, + "PRIMARY SYSTEM PROMPT", + "Remember src/main.cpp; command bash -lc ./build.sh test; latest error compile failed.", + get_agent_tools_schema(false), + mock_llm); + + ASSERT_EQ(loop_turn, 2); + EXPECT_NE(compaction_request.find("src/main.cpp"), std::string::npos); + EXPECT_NE(compaction_request.find("bash -lc ./build.sh test"), std::string::npos); + EXPECT_NE(compaction_request.find("compile failed"), std::string::npos); + + const std::string summary = nth_message_content_with_role(second_turn_messages, "system", 1); + EXPECT_NE(summary.find("src/main.cpp"), std::string::npos); + EXPECT_NE(summary.find("bash -lc ./build.sh test"), std::string::npos); + EXPECT_NE(summary.find("compile failed"), std::string::npos); +} diff --git a/tests/test_agent_mock_e2e.cpp b/tests/test_agent_mock_e2e.cpp index 5c258c7..c6a497c 100644 --- a/tests/test_agent_mock_e2e.cpp +++ b/tests/test_agent_mock_e2e.cpp @@ -31,6 +31,18 @@ int run_bash(const std::string& command) { return std::system(wrapped.c_str()); } +std::string tool_message_content_for_id(const nlohmann::json& messages, const std::string& tool_call_id) { + for (const auto& message : messages) { + if (message.value("role", "") != "tool") { + continue; + } + if (message.value("tool_call_id", "") == tool_call_id) { + return message.value("content", ""); + } + } + return ""; +} + } // namespace class AgentMockE2ETest : public ::testing::Test { @@ -226,3 +238,81 @@ TEST_F(AgentMockE2ETest, MockReadOnlyRepoObservationChainDoesNotWriteWorkspace) EXPECT_EQ(turn, 4); EXPECT_EQ(snapshot_workspace_files(), before_snapshot); } + +TEST_F(AgentMockE2ETest, ParentDelegatesChildAndReceivesStructuredSummary) { + create_file("delegate.txt", "delegate me"); + + AgentConfig config; + config.workspace_abs = test_workspace; + config.max_turns = 4; + config.max_tool_calls_per_turn = 4; + config.max_total_tool_calls = 8; + config.max_tool_output_bytes = 4096; + config.max_context_bytes = 12000; + + int parent_turn = 0; + int child_turn = 0; + nlohmann::json second_turn_messages = nlohmann::json::array(); + + LLMStreamFunc mock_llm = [&](const AgentConfig&, const nlohmann::json& msgs, const nlohmann::json&) -> nlohmann::json { + const std::string first_system = msgs[0].value("content", ""); + if (first_system.find("temporary delegated subagent") != std::string::npos) { + child_turn++; + if (child_turn == 1) { + return nlohmann::json{ + {"role", "assistant"}, + {"tool_calls", {{ + {"id", "child_read"}, + {"function", { + {"name", "read_file_safe"}, + {"arguments", "{\"path\":\"delegate.txt\"}"} + }} + }}} + }; + } + + return nlohmann::json{ + {"role", "assistant"}, + {"content", + "{\"ok\":true,\"result_summary\":\"Read delegate.txt\",\"files_touched\":[\"delegate.txt\"]," + "\"key_facts\":[\"delegate.txt contains delegate me\"],\"open_questions\":[]," + "\"commands_ran\":[],\"verification_passed\":true,\"error\":\"\"}"} + }; + } + + parent_turn++; + if (parent_turn == 1) { + return nlohmann::json{ + {"role", "assistant"}, + {"tool_calls", {{ + {"id", "delegate_call"}, + {"function", { + {"name", "delegate_subagent"}, + {"arguments", + "{\"role\":\"reviewer\",\"task\":\"Inspect delegate.txt\",\"context_files\":[\"delegate.txt\"]," + "\"expected_output\":\"Return the important fact\",\"max_turns\":2}"} + }} + }}} + }; + } + + second_turn_messages = msgs; + return nlohmann::json{ + {"role", "assistant"}, + {"content", "Parent integrated the child summary"} + }; + }; + + agent_run(config, "parent system", "delegate a side task", get_agent_tools_schema(), mock_llm); + + ASSERT_EQ(parent_turn, 2); + ASSERT_EQ(child_turn, 2); + + const auto result = nlohmann::json::parse(tool_message_content_for_id(second_turn_messages, "delegate_call")); + EXPECT_TRUE(result["ok"].get()) << result.dump(); + EXPECT_EQ(result["role"], "reviewer"); + EXPECT_EQ(result["task"], "Inspect delegate.txt"); + EXPECT_EQ(result["result_summary"], "Read delegate.txt"); + EXPECT_EQ(result["files_touched"], nlohmann::json::array({"delegate.txt"})); + EXPECT_EQ(result["key_facts"], nlohmann::json::array({"delegate.txt contains delegate me"})); +} diff --git a/tests/test_schema_and_args_tolerance.cpp b/tests/test_schema_and_args_tolerance.cpp index 2b92310..39f08dc 100644 --- a/tests/test_schema_and_args_tolerance.cpp +++ b/tests/test_schema_and_args_tolerance.cpp @@ -34,7 +34,7 @@ int run_bash(const std::string& command) { TEST(SchemaAndArgsToleranceTest, GetSchemaMatchesCurrentTools) { json schema = get_agent_tools_schema(); - EXPECT_EQ(schema.size(), 13u); + EXPECT_EQ(schema.size(), 14u); bool has_read = false; bool has_write = false; @@ -49,6 +49,7 @@ TEST(SchemaAndArgsToleranceTest, GetSchemaMatchesCurrentTools) { bool has_git_show = false; bool has_git_add = false; bool has_git_commit = false; + bool has_delegate = false; for (const auto& tool : schema) { std::string name = tool["function"]["name"]; if (name == "read_file_safe") has_read = true; @@ -64,6 +65,7 @@ TEST(SchemaAndArgsToleranceTest, GetSchemaMatchesCurrentTools) { if (name == "git_show") has_git_show = true; if (name == "git_add") has_git_add = true; if (name == "git_commit") has_git_commit = true; + if (name == "delegate_subagent") has_delegate = true; } EXPECT_TRUE(has_read); EXPECT_TRUE(has_write); @@ -78,6 +80,32 @@ TEST(SchemaAndArgsToleranceTest, GetSchemaMatchesCurrentTools) { EXPECT_TRUE(has_git_show); EXPECT_TRUE(has_git_add); EXPECT_TRUE(has_git_commit); + EXPECT_TRUE(has_delegate); +} + +TEST(SchemaAndArgsToleranceTest, DelegateSubagentSchemaExposesExpectedFields) { + json schema = get_agent_tools_schema(); + + for (const auto& tool : schema) { + if (tool["function"]["name"] != "delegate_subagent") { + continue; + } + + const json& parameters = tool["function"]["parameters"]; + ASSERT_TRUE(parameters.is_object()); + ASSERT_TRUE(parameters.contains("properties")); + const json& properties = parameters["properties"]; + EXPECT_TRUE(properties.contains("role")); + EXPECT_TRUE(properties.contains("task")); + EXPECT_TRUE(properties.contains("context_files")); + EXPECT_TRUE(properties.contains("context_notes")); + EXPECT_TRUE(properties.contains("expected_output")); + EXPECT_TRUE(properties.contains("max_turns")); + EXPECT_EQ(parameters["required"], json::array({"role", "task"})); + return; + } + + ADD_FAILURE() << "delegate_subagent not found in tool schema"; } TEST(SchemaAndArgsToleranceTest, BashTimeoutRejectsOutOfRangeInteger) { From c5387a4fa20bb0b1e6e956f4a5b32d34f552e459 Mon Sep 17 00:00:00 2001 From: leelaugh <2560536821@qq.com> Date: Mon, 30 Mar 2026 14:28:43 +0800 Subject: [PATCH 4/4] fix: address code review issues for docgen and agent loop - fix(docgen): improve JSON extraction robustness (match code block, fallback to braces) - fix(docgen): unify patch simulation with real tool behavior (count occurrences, reject multi-matches) - fix(agent): simplify compaction boundary logic, remove special case and Chinese comments - feat(docgen): support InsertBefore/InsertAfter actions in to_patch_entries - test: all relevant tests pass (compaction, apply_patch, docgen_types) --- src/agent_loop.cpp | 17 ++++---- src/docgen_llm.cpp | 40 +++++++------------ src/docgen_pipeline.cpp | 87 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 101 insertions(+), 43 deletions(-) diff --git a/src/agent_loop.cpp b/src/agent_loop.cpp index 60c0ac2..0f97457 100644 --- a/src/agent_loop.cpp +++ b/src/agent_loop.cpp @@ -457,16 +457,13 @@ void compact_messages_if_needed(const AgentConfig& config, return; } - std::size_t recent_start_index; - if (non_system_indices.size() == kCompactionRecentNonSystemMessages) { - // 当非系统消息数量正好等于保留数量时,至少保留一条消息用于压缩 - // 跳过第一条非系统消息,从第二条开始作为最近消息 - recent_start_index = non_system_indices[1]; - LOG_INFO("Compaction boundary case: using recent_start_index={} (second non-system message)", recent_start_index); - } else { - recent_start_index = non_system_indices[non_system_indices.size() - kCompactionRecentNonSystemMessages]; - LOG_INFO("Compaction normal case: recent_start_index={}", recent_start_index); - } + // When non_system_indices.size() == kCompactionRecentNonSystemMessages, + // recent_start_index will point to the first non-system message, + // resulting in empty old_messages and compaction will be skipped. + // This matches the intuitive behavior: no extra messages to compact. + const std::size_t recent_start_index = non_system_indices[non_system_indices.size() - kCompactionRecentNonSystemMessages]; + LOG_INFO("Compaction: recent_start_index={} ({} recent non-system messages retained)", + recent_start_index, kCompactionRecentNonSystemMessages); nlohmann::json old_messages = nlohmann::json::array(); for (std::size_t i = 1; i < recent_start_index; ++i) { old_messages.push_back(messages[i]); diff --git a/src/docgen_llm.cpp b/src/docgen_llm.cpp index 1e199f5..4c6962b 100644 --- a/src/docgen_llm.cpp +++ b/src/docgen_llm.cpp @@ -115,38 +115,26 @@ std::expected LlmClient::call_json( } std::string LlmClient::extract_json_from_response(const std::string& content) { - std::string trimmed; - for (char c : content) { - if (c != ' ' && c != '\n' && c != '\r' && c != '\t') { - trimmed += c; - } - } - - if (!trimmed.empty() && trimmed[0] == '{') { - size_t brace_count = 0; - size_t end_pos = 0; - for (size_t i = 0; i < trimmed.size(); ++i) { - if (trimmed[i] == '{') ++brace_count; - else if (trimmed[i] == '}') { - --brace_count; - if (brace_count == 0) { - end_pos = i + 1; - break; - } - } - } - if (end_pos > 0) { - return trimmed.substr(0, end_pos); - } - } - + // 1. Try to extract JSON from Markdown code block first std::regex json_block(R"(```(?:json)?\s*(\{[\s\S]*?\})\s*```)"); std::smatch match; if (std::regex_search(content, match, json_block)) { return match[1].str(); } - return content; + // 2. Fallback: find the outermost braces from first '{' to last '}' + const size_t first_brace = content.find('{'); + if (first_brace == std::string::npos) { + return content; // No opening brace found + } + + const size_t last_brace = content.rfind('}'); + if (last_brace == std::string::npos || last_brace <= first_brace) { + return content; // No matching closing brace + } + + // Return the substring including both braces + return content.substr(first_brace, last_brace - first_brace + 1); } } // namespace docgen diff --git a/src/docgen_pipeline.cpp b/src/docgen_pipeline.cpp index 222e48d..d5b9875 100644 --- a/src/docgen_pipeline.cpp +++ b/src/docgen_pipeline.cpp @@ -9,6 +9,21 @@ namespace docgen { namespace fs = std::filesystem; +// Helper function matching apply_patch.cpp's behavior +static int count_occurrences_with_overlap(const std::string& haystack, + const std::string& needle) { + if (needle.empty()) { + return 0; + } + int count = 0; + std::string::size_type pos = 0; + while ((pos = haystack.find(needle, pos)) != std::string::npos) { + ++count; + pos += 1; // advance by 1, NOT by needle.size(), to detect overlaps + } + return count; +} + DocgenPipeline::DocgenPipeline(const AgentConfig& cfg, const SubagentContext& ctx) : config_(cfg), ctx_(ctx), llm_(std::make_unique(cfg, ctx)) {} @@ -56,18 +71,61 @@ std::expected DocgenPipeline::run(const std::string if (!patch_result || patch_result->patches.empty()) continue; auto after = original; + int patches_skipped = 0; for (const auto& p : patch_result->patches) { - if (p.action == PatchAction::Replace || p.action == PatchAction::Delete) { - auto pos = after.find(p.old_text); - if (pos != std::string::npos) { + // Only process known patch actions + if (p.action != PatchAction::Replace && p.action != PatchAction::Delete && + p.action != PatchAction::InsertBefore && p.action != PatchAction::InsertAfter) { + patches_skipped++; + continue; + } + + // Check that old_text appears exactly once (as required by apply_patch_single) + const int occurrences = count_occurrences_with_overlap(after, p.old_text); + if (occurrences != 1) { + patches_skipped++; + continue; + } + + const std::string::size_type pos = after.find(p.old_text); + if (pos == std::string::npos) { + // Should not happen given occurrences == 1, but guard anyway + patches_skipped++; + continue; + } + + switch (p.action) { + case PatchAction::Replace: after.replace(pos, p.old_text.length(), p.new_text); - } + break; + case PatchAction::Delete: + after.replace(pos, p.old_text.length(), ""); + break; + case PatchAction::InsertBefore: + after.replace(pos, 0, p.new_text); + break; + case PatchAction::InsertAfter: + after.replace(pos + p.old_text.length(), 0, p.new_text); + break; + default: + patches_skipped++; + break; } } + if (patches_skipped > 0) { + result.patches_rejected += patches_skipped; + } + + // Skip review if all patches were rejected during simulation + if (patches_skipped == static_cast(patch_result->patches.size())) { + continue; + } + auto review = review_patch(original, after); if (!review || review->verdict == ReviewVerdict::Reject) { - result.patches_rejected += static_cast(patch_result->patches.size()); + // Only count patches that passed simulation but failed review + result.patches_rejected += static_cast(patch_result->patches.size()) - patches_skipped; continue; } @@ -220,8 +278,23 @@ std::vector DocgenPipeline::to_patch_entries( std::vector entries; for (const auto& p : patches.patches) { - if (p.action == PatchAction::Replace || p.action == PatchAction::Delete) { - entries.push_back({p.old_text, p.new_text}); + switch (p.action) { + case PatchAction::Replace: + case PatchAction::Delete: + entries.push_back({p.old_text, p.new_text}); + break; + case PatchAction::InsertBefore: + // Insert before old_text: replace old_text with new_text + old_text + entries.push_back({p.old_text, p.new_text + p.old_text}); + break; + case PatchAction::InsertAfter: + // Insert after old_text: replace old_text with old_text + new_text + entries.push_back({p.old_text, p.old_text + p.new_text}); + break; + default: + // Unknown action – ignore with a log warning + spdlog::warn("to_patch_entries: ignoring unknown patch action"); + break; } }