fix(agent): improve ReAct parser and tool schemas for better LLM compatibility

- Fix ACTION_INPUT regex to properly capture multiline JSON responses - Changed from stopping at first newline to capturing all remaining text - Resolves parsing errors when LLM generates formatted JSON with line breaks - Enhance tool schemas with detailed descriptions and parameter specifications - Add comprehensive Message schema for generate_text tool - Clarify distinction between resources/get (file read) and resources/list (directory listing) - Include clear usage guidance in tool descriptions - Set default model to llama3.2:latest instead of invalid "ollama" - Add parse error debugging to help troubleshoot LLM response issues The agent infrastructure now correctly handles multiline tool arguments and provides better guidance to LLMs through improved tool schemas. Remaining errors are due to LLM quality (model making poor tool choices or generating malformed responses), not infrastructure bugs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-09 19:43:07 +02:00
parent 05e90d3e2b
commit 33d11ae223
25 changed files with 1348 additions and 121 deletions
--- a/crates/owlen-core/src/agent.rs
+++ b/crates/owlen-core/src/agent.rs
@@ -0,0 +1,377 @@
+//! High‑level agentic executor implementing the ReAct pattern.
+//!
+//! The executor coordinates three responsibilities:
+//!   1. Build a ReAct prompt from the conversation history and the list of
+//!      available MCP tools.
+//!   2. Send the prompt to an LLM provider (any type implementing
+//!      `owlen_core::Provider`).
+//!   3. Parse the LLM response, optionally invoke a tool via an MCP client,
+//!      and feed the observation back into the conversation.
+//!
+//! The implementation is intentionally minimal – it provides the core loop
+//! required by Phase 4 of the roadmap. Integration with the TUI and additional
+//! safety mechanisms can be added on top of this module.
+
+use std::sync::Arc;
+
+use crate::ui::UiController;
+
+use dirs;
+use regex::Regex;
+use serde_json::json;
+use std::fs::OpenOptions;
+use std::io::Write;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::time::{SystemTime, UNIX_EPOCH};
+use tokio::signal;
+
+use crate::mcp::client::McpClient;
+use crate::mcp::{McpToolCall, McpToolDescriptor, McpToolResponse};
+use crate::{
+    types::{ChatRequest, Message},
+    Error, Provider, Result as CoreResult,
+};
+
+/// Configuration for the agent executor.
+#[derive(Debug, Clone)]
+pub struct AgentConfig {
+    /// Maximum number of ReAct iterations before the executor aborts.
+    pub max_iterations: usize,
+    /// Model name to use for the LLM provider.
+    pub model: String,
+    /// Optional temperature.
+    pub temperature: Option<f32>,
+    /// Optional max_tokens.
+    pub max_tokens: Option<u32>,
+    /// Maximum number of tool calls allowed per execution (budget).
+    pub max_tool_calls: usize,
+}
+
+impl Default for AgentConfig {
+    fn default() -> Self {
+        Self {
+            max_iterations: 10,
+            model: "ollama".into(),
+            temperature: Some(0.7),
+            max_tokens: None,
+            max_tool_calls: 20,
+        }
+    }
+}
+
+/// Enum representing the possible parsed LLM responses in ReAct format.
+#[derive(Debug)]
+pub enum LlmResponse {
+    /// A reasoning step without action.
+    Reasoning { thought: String },
+    /// The model wants to invoke a tool.
+    ToolCall {
+        thought: String,
+        tool_name: String,
+        arguments: serde_json::Value,
+    },
+    /// The model produced a final answer.
+    FinalAnswer { thought: String, answer: String },
+}
+
+/// Error type for the agent executor.
+#[derive(thiserror::Error, Debug)]
+pub enum AgentError {
+    #[error("LLM provider error: {0}")]
+    Provider(Error),
+    #[error("MCP client error: {0}")]
+    Mcp(Error),
+    #[error("Tool execution denied by user")]
+    ToolDenied,
+    #[error("Failed to parse LLM response")]
+    Parse,
+    #[error("Maximum iterations ({0}) reached without final answer")]
+    MaxIterationsReached(usize),
+    #[error("Agent execution cancelled by user (Ctrl+C)")]
+    Cancelled,
+}
+
+/// Core executor handling the ReAct loop.
+pub struct AgentExecutor {
+    llm_client: Arc<dyn Provider + Send + Sync>,
+    tool_client: Arc<dyn McpClient + Send + Sync>,
+    config: AgentConfig,
+    ui_controller: Option<Arc<dyn UiController + Send + Sync>>, // optional UI for confirmations
+}
+
+impl AgentExecutor {
+    /// Construct a new executor.
+    pub fn new(
+        llm_client: Arc<dyn Provider + Send + Sync>,
+        tool_client: Arc<dyn McpClient + Send + Sync>,
+        config: AgentConfig,
+        ui_controller: Option<Arc<dyn UiController + Send + Sync>>, // pass None for headless
+    ) -> Self {
+        Self {
+            llm_client,
+            tool_client,
+            config,
+            ui_controller,
+        }
+    }
+
+    /// Discover tools exposed by the MCP server.
+    async fn discover_tools(&self) -> CoreResult<Vec<McpToolDescriptor>> {
+        self.tool_client.list_tools().await
+    }
+
+    // #[allow(dead_code)]
+    // Build a ReAct prompt from the current message history and discovered tools.
+    /*
+        #[allow(dead_code)]
+        fn build_prompt(
+            &self,
+            history: &[Message],
+            tools: &[McpToolDescriptor],
+        ) -> String {
+            // System prompt describing the format.
+            let system = "You are an intelligent agent following the ReAct pattern. Use the following sections:\nTHOUGHT: your reasoning\nACTION: the tool name you want to call (or "final_answer")\nACTION_INPUT: JSON arguments for the tool.\nIf ACTION is "final_answer", provide the final answer in the next line after the ACTION_INPUT.\n";
+
+            let mut prompt = format!("System: {}\n", system);
+            // Append conversation history.
+            for msg in history {
+                let role = match msg.role {
+                    Role::User => "User",
+                    Role::Assistant => "Assistant",
+                    Role::System => "System",
+                    Role::Tool => "Tool",
+                };
+                prompt.push_str(&format!("{}: {}\n", role, msg.content));
+            }
+            // Append tool descriptions.
+            if !tools.is_empty() {
+                let tools_json = json!(tools);
+                prompt.push_str(&format!("Available tools (JSON schema): {}\n", tools_json));
+            }
+            prompt
+        }
+    */
+
+    // build_prompt removed; not used in current implementation
+
+    /// Parse raw LLM text into a structured `LlmResponse`.
+    pub fn parse_response(&self, text: &str) -> std::result::Result<LlmResponse, AgentError> {
+        // Normalise line endings.
+        let txt = text.trim();
+        // Regex patterns for parsing ReAct format.
+        // THOUGHT and ACTION capture up to the next newline.
+        // ACTION_INPUT captures everything remaining (including multiline JSON).
+        let thought_re = Regex::new(r"(?s)THOUGHT:\s*(?P<thought>.+?)(?:\n|$)").unwrap();
+        let action_re = Regex::new(r"(?s)ACTION:\s*(?P<action>.+?)(?:\n|$)").unwrap();
+        // ACTION_INPUT captures rest of text (multiline-friendly)
+        let input_re = Regex::new(r"(?s)ACTION_INPUT:\s*(?P<input>.+)").unwrap();
+
+        let thought = thought_re
+            .captures(txt)
+            .and_then(|c| c.name("thought"))
+            .map(|m| m.as_str().trim().to_string())
+            .ok_or(AgentError::Parse)?;
+        let action = action_re
+            .captures(txt)
+            .and_then(|c| c.name("action"))
+            .map(|m| m.as_str().trim().to_string())
+            .ok_or(AgentError::Parse)?;
+        let input = input_re
+            .captures(txt)
+            .and_then(|c| c.name("input"))
+            .map(|m| m.as_str().trim().to_string())
+            .ok_or(AgentError::Parse)?;
+
+        if action.eq_ignore_ascii_case("final_answer") {
+            Ok(LlmResponse::FinalAnswer {
+                thought,
+                answer: input,
+            })
+        } else {
+            // Parse arguments as JSON, falling back to a string if invalid.
+            let args = serde_json::from_str(&input).unwrap_or_else(|_| json!(input));
+            Ok(LlmResponse::ToolCall {
+                thought,
+                tool_name: action,
+                arguments: args,
+            })
+        }
+    }
+
+    /// Execute a single tool call via the MCP client.
+    async fn execute_tool(
+        &self,
+        name: &str,
+        arguments: serde_json::Value,
+    ) -> CoreResult<McpToolResponse> {
+        // For potentially unsafe tools (write/delete) ask for UI confirmation
+        // if a controller is available.
+        let dangerous = name.contains("write") || name.contains("delete");
+        if dangerous {
+            if let Some(controller) = &self.ui_controller {
+                let prompt = format!(
+                    "Confirm execution of potentially unsafe tool '{}' with args {}?",
+                    name, arguments
+                );
+                if !controller.confirm(&prompt).await {
+                    return Err(Error::PermissionDenied(format!(
+                        "Tool '{}' denied by user",
+                        name
+                    )));
+                }
+            }
+        }
+        let call = McpToolCall {
+            name: name.to_string(),
+            arguments,
+        };
+        self.tool_client.call_tool(call).await
+    }
+
+    /// Run the full ReAct loop and return the final answer.
+    pub async fn run(&self, query: String) -> std::result::Result<String, AgentError> {
+        let tools = self.discover_tools().await.map_err(AgentError::Mcp)?;
+
+        // Build system prompt with ReAct format instructions
+        let tools_desc = tools
+            .iter()
+            .map(|t| {
+                let schema_str = serde_json::to_string_pretty(&t.input_schema)
+                    .unwrap_or_else(|_| "{}".to_string());
+                format!(
+                    "- {}: {}\n  Input schema: {}",
+                    t.name, t.description, schema_str
+                )
+            })
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        let system_prompt = format!(
+            "You are an AI assistant that uses the ReAct (Reasoning + Acting) pattern to solve tasks.\n\n\
+            You must ALWAYS respond in this exact format:\n\n\
+            THOUGHT: <your reasoning about what to do next>\n\
+            ACTION: <tool_name or \"final_answer\">\n\
+            ACTION_INPUT: <JSON arguments for the tool, or the final answer text>\n\n\
+            Available tools:\n{}\n\n\
+            HOW IT WORKS:\n\
+            1. When you call a tool, you will receive its output in the next message\n\
+            2. After receiving the tool output, analyze it and either:\n\
+               a) Use the information to provide a final answer\n\
+               b) Call another tool if you need more information\n\
+            3. When you have the information needed to answer the user's question, provide a final answer\n\n\
+            To provide a final answer:\n\
+            THOUGHT: <summary of what you learned>\n\
+            ACTION: final_answer\n\
+            ACTION_INPUT: <your complete answer using the information from the tools>\n\n\
+            IMPORTANT: You MUST follow this format exactly. Do not deviate from it.\n\
+            IMPORTANT: Only use the tools listed above. Do not try to use tools that are not listed.\n\
+            IMPORTANT: When providing the final answer, include the actual information you learned, not just the tool arguments.",
+            tools_desc
+        );
+
+        // Initialize conversation with system prompt and user query
+        let mut messages = vec![Message::system(system_prompt.clone()), Message::user(query)];
+
+        // Cancellation flag set when Ctrl+C is received.
+        let cancelled = Arc::new(AtomicBool::new(false));
+        let cancel_flag = cancelled.clone();
+        tokio::spawn(async move {
+            // Wait for Ctrl+C signal.
+            let _ = signal::ctrl_c().await;
+            cancel_flag.store(true, Ordering::SeqCst);
+        });
+
+        let mut tool_calls = 0usize;
+        for _ in 0..self.config.max_iterations {
+            if cancelled.load(Ordering::SeqCst) {
+                return Err(AgentError::Cancelled);
+            }
+            // Build a ChatRequest for the provider.
+            let chat_req = ChatRequest {
+                model: self.config.model.clone(),
+                messages: messages.clone(),
+                parameters: crate::types::ChatParameters {
+                    temperature: self.config.temperature,
+                    max_tokens: self.config.max_tokens,
+                    stream: false,
+                    extra: Default::default(),
+                },
+                tools: Some(tools.clone()),
+            };
+            let raw_resp = self
+                .llm_client
+                .chat(chat_req)
+                .await
+                .map_err(AgentError::Provider)?;
+            let parsed = self
+                .parse_response(&raw_resp.message.content)
+                .map_err(|e| {
+                    eprintln!("\n=== PARSE ERROR ===");
+                    eprintln!("Error: {:?}", e);
+                    eprintln!("LLM Response:\n{}", raw_resp.message.content);
+                    eprintln!("=== END ===\n");
+                    e
+                })?;
+            match parsed {
+                LlmResponse::Reasoning { thought } => {
+                    // Append the reasoning as an assistant message.
+                    messages.push(Message::assistant(thought));
+                }
+                LlmResponse::ToolCall {
+                    thought,
+                    tool_name,
+                    arguments,
+                } => {
+                    // Record the thought.
+                    messages.push(Message::assistant(thought));
+                    // Enforce tool call budget.
+                    tool_calls += 1;
+                    if tool_calls > self.config.max_tool_calls {
+                        return Err(AgentError::MaxIterationsReached(self.config.max_iterations));
+                    }
+                    // Execute tool.
+                    let args_clone = arguments.clone();
+                    let tool_resp = self
+                        .execute_tool(&tool_name, args_clone.clone())
+                        .await
+                        .map_err(AgentError::Mcp)?;
+                    // Convert tool output to a string for the message.
+                    let output_str = tool_resp
+                        .output
+                        .as_str()
+                        .map(|s| s.to_string())
+                        .unwrap_or_else(|| tool_resp.output.to_string());
+                    // Audit log the tool execution.
+                    if let Some(config_dir) = dirs::config_dir() {
+                        let log_path = config_dir.join("owlen/logs/tool_execution.log");
+                        if let Some(parent) = log_path.parent() {
+                            let _ = std::fs::create_dir_all(parent);
+                        }
+                        if let Ok(mut file) =
+                            OpenOptions::new().create(true).append(true).open(&log_path)
+                        {
+                            let ts = SystemTime::now()
+                                .duration_since(UNIX_EPOCH)
+                                .unwrap_or_default()
+                                .as_secs();
+                            let _ = writeln!(
+                                file,
+                                "{} | tool: {} | args: {} | output: {}",
+                                ts, tool_name, args_clone, output_str
+                            );
+                        }
+                    }
+                    messages.push(Message::tool(tool_name, output_str));
+                }
+                LlmResponse::FinalAnswer { thought, answer } => {
+                    // Append final thought and answer, then return.
+                    messages.push(Message::assistant(thought));
+                    // The final answer should be a single assistant message.
+                    messages.push(Message::assistant(answer.clone()));
+                    return Ok(answer);
+                }
+            }
+        }
+        Err(AgentError::MaxIterationsReached(self.config.max_iterations))
+    }
+}