BREAKING CHANGES: - owlen-core no longer depends on ratatui/crossterm - RemoteMcpClient constructors are now async - MCP path validation is stricter (security hardening) This commit resolves three critical issues identified in project analysis: ## P0-1: Extract TUI dependencies from owlen-core Create owlen-ui-common crate to hold UI-agnostic color and theme abstractions, removing architectural boundary violation. Changes: - Create new owlen-ui-common crate with abstract Color enum - Move theme.rs from owlen-core to owlen-ui-common - Define Color with Rgb and Named variants (no ratatui dependency) - Create color conversion layer in owlen-tui (color_convert.rs) - Update 35+ color usages with conversion wrappers - Remove ratatui/crossterm from owlen-core dependencies Benefits: - owlen-core usable in headless/CLI contexts - Enables future GUI frontends - Reduces binary size for core library consumers ## P0-2: Fix blocking WebSocket connections Convert RemoteMcpClient constructors to async, eliminating runtime blocking that froze TUI for 30+ seconds on slow connections. Changes: - Make new_with_runtime(), new_with_config(), new() async - Remove block_in_place wrappers for I/O operations - Add 30-second connection timeout with tokio::time::timeout - Update 15+ call sites across 10 files to await constructors - Convert 4 test functions to #[tokio::test] Benefits: - TUI remains responsive during WebSocket connections - Proper async I/O follows Rust best practices - No more indefinite hangs ## P1-1: Secure path traversal vulnerabilities Implement comprehensive path validation with 7 defense layers to prevent file access outside workspace boundaries. Changes: - Create validate_safe_path() with multi-layer security: * URL decoding (prevents %2E%2E bypasses) * Absolute path rejection * Null byte protection * Windows-specific checks (UNC/device paths) * Lexical path cleaning (removes .. components) * Symlink resolution via canonicalization * Boundary verification with starts_with check - Update 4 MCP resource functions (get/list/write/delete) - Add 11 comprehensive security tests Benefits: - Blocks URL-encoded, absolute, UNC path attacks - Prevents null byte injection - Stops symlink escape attempts - Cross-platform security (Windows/Linux/macOS) ## Test Results - owlen-core: 109/109 tests pass (100%) - owlen-tui: 52/53 tests pass (98%, 1 pre-existing failure) - owlen-providers: 2/2 tests pass (100%) - Build: cargo build --all succeeds ## Verification - ✓ cargo tree -p owlen-core shows no TUI dependencies - ✓ No block_in_place calls remain in MCP I/O code - ✓ All 11 security tests pass Fixes: #P0-1, #P0-2, #P1-1 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
276 lines
9.2 KiB
Rust
276 lines
9.2 KiB
Rust
//! Integration tests for the ReAct agent loop functionality.
|
|
//!
|
|
//! These tests verify that the agent executor correctly:
|
|
//! - Parses ReAct formatted responses
|
|
//! - Executes tool calls
|
|
//! - Handles multi-step workflows
|
|
//! - Recovers from errors
|
|
//! - Respects iteration limits
|
|
|
|
use owlen_cli::agent::{AgentConfig, AgentExecutor, LlmResponse};
|
|
use owlen_core::mcp::remote_client::RemoteMcpClient;
|
|
use owlen_core::tools::WEB_SEARCH_TOOL_NAME;
|
|
use std::sync::Arc;
|
|
|
|
#[tokio::test]
|
|
async fn test_react_parsing_tool_call() {
|
|
let executor = create_test_executor().await;
|
|
|
|
// Test parsing a tool call with JSON arguments
|
|
let text = "THOUGHT: I should search for information\nACTION: web_search\nACTION_INPUT: {\"query\": \"rust async programming\"}\n";
|
|
|
|
let result = executor.parse_response(text);
|
|
|
|
match result {
|
|
Ok(LlmResponse::ToolCall {
|
|
thought,
|
|
tool_name,
|
|
arguments,
|
|
}) => {
|
|
assert_eq!(thought, "I should search for information");
|
|
assert_eq!(tool_name.as_str(), WEB_SEARCH_TOOL_NAME);
|
|
assert_eq!(arguments["query"], "rust async programming");
|
|
}
|
|
other => panic!("Expected ToolCall, got: {:?}", other),
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_react_parsing_final_answer() {
|
|
let executor = create_test_executor().await;
|
|
|
|
let text = "THOUGHT: I have enough information now\nFINAL_ANSWER: The answer is 42\n";
|
|
|
|
let result = executor.parse_response(text);
|
|
|
|
match result {
|
|
Ok(LlmResponse::FinalAnswer { thought, answer }) => {
|
|
assert_eq!(thought, "I have enough information now");
|
|
assert_eq!(answer, "The answer is 42");
|
|
}
|
|
other => panic!("Expected FinalAnswer, got: {:?}", other),
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_react_parsing_with_multiline_thought() {
|
|
let executor = create_test_executor().await;
|
|
|
|
let text = "THOUGHT: This is a complex\nmulti-line thought\nACTION: list_files\nACTION_INPUT: {\"path\": \".\"}\n";
|
|
|
|
let result = executor.parse_response(text);
|
|
|
|
// The regex currently only captures until first newline
|
|
// This test documents current behavior
|
|
match result {
|
|
Ok(LlmResponse::ToolCall { thought, .. }) => {
|
|
// Regex pattern stops at first \n after THOUGHT:
|
|
assert!(thought.contains("This is a complex"));
|
|
}
|
|
other => panic!("Expected ToolCall, got: {:?}", other),
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore] // Requires MCP LLM server to be running
|
|
async fn test_agent_single_tool_scenario() {
|
|
// This test requires a running MCP LLM server (which wraps Ollama)
|
|
let provider = Arc::new(RemoteMcpClient::new().await.unwrap());
|
|
let mcp_client = Arc::clone(&provider) as Arc<RemoteMcpClient>;
|
|
|
|
let config = AgentConfig {
|
|
max_iterations: 5,
|
|
model: "llama3.2".to_string(),
|
|
temperature: Some(0.7),
|
|
max_tokens: None,
|
|
..AgentConfig::default()
|
|
};
|
|
|
|
let executor = AgentExecutor::new(provider, mcp_client, config);
|
|
|
|
// Simple query that should complete in one tool call
|
|
let result = executor
|
|
.run("List files in the current directory".to_string())
|
|
.await;
|
|
|
|
match result {
|
|
Ok(agent_result) => {
|
|
assert!(
|
|
!agent_result.answer.is_empty(),
|
|
"Answer should not be empty"
|
|
);
|
|
println!("Agent answer: {}", agent_result.answer);
|
|
}
|
|
Err(e) => {
|
|
// It's okay if this fails due to LLM not following format
|
|
println!("Agent test skipped: {}", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore] // Requires Ollama to be running
|
|
async fn test_agent_multi_step_workflow() {
|
|
// Test a query that requires multiple tool calls
|
|
let provider = Arc::new(RemoteMcpClient::new().await.unwrap());
|
|
let mcp_client = Arc::clone(&provider) as Arc<RemoteMcpClient>;
|
|
|
|
let config = AgentConfig {
|
|
max_iterations: 10,
|
|
model: "llama3.2".to_string(),
|
|
temperature: Some(0.5), // Lower temperature for more consistent behavior
|
|
max_tokens: None,
|
|
..AgentConfig::default()
|
|
};
|
|
|
|
let executor = AgentExecutor::new(provider, mcp_client, config);
|
|
|
|
// Query requiring multiple steps: list -> read -> analyze
|
|
let result = executor
|
|
.run("Find all Rust files and tell me which one contains 'Agent'".to_string())
|
|
.await;
|
|
|
|
match result {
|
|
Ok(agent_result) => {
|
|
assert!(!agent_result.answer.is_empty());
|
|
println!("Multi-step answer: {:?}", agent_result);
|
|
}
|
|
Err(e) => {
|
|
println!("Multi-step test skipped: {}", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore] // Requires Ollama
|
|
async fn test_agent_iteration_limit() {
|
|
let provider = Arc::new(RemoteMcpClient::new().await.unwrap());
|
|
let mcp_client = Arc::clone(&provider) as Arc<RemoteMcpClient>;
|
|
|
|
let config = AgentConfig {
|
|
max_iterations: 2, // Very low limit to test enforcement
|
|
model: "llama3.2".to_string(),
|
|
temperature: Some(0.7),
|
|
max_tokens: None,
|
|
..AgentConfig::default()
|
|
};
|
|
|
|
let executor = AgentExecutor::new(provider, mcp_client, config);
|
|
|
|
// Complex query that would require many iterations
|
|
let result = executor
|
|
.run("Perform an exhaustive analysis of all files".to_string())
|
|
.await;
|
|
|
|
// Should hit the iteration limit (or parse error if LLM doesn't follow format)
|
|
match result {
|
|
Err(e) => {
|
|
let error_str = format!("{}", e);
|
|
// Accept either iteration limit error or parse error (LLM didn't follow ReAct format)
|
|
assert!(
|
|
error_str.contains("Maximum iterations")
|
|
|| error_str.contains("2")
|
|
|| error_str.contains("parse"),
|
|
"Expected iteration limit or parse error, got: {}",
|
|
error_str
|
|
);
|
|
println!("Test passed: agent stopped with error: {}", error_str);
|
|
}
|
|
Ok(_) => {
|
|
// It's possible the LLM completed within 2 iterations
|
|
println!("Agent completed within iteration limit");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore] // Requires Ollama
|
|
async fn test_agent_tool_budget_enforcement() {
|
|
let provider = Arc::new(RemoteMcpClient::new().await.unwrap());
|
|
let mcp_client = Arc::clone(&provider) as Arc<RemoteMcpClient>;
|
|
|
|
let config = AgentConfig {
|
|
max_iterations: 3, // Very low iteration limit to enforce budget
|
|
model: "llama3.2".to_string(),
|
|
temperature: Some(0.7),
|
|
max_tokens: None,
|
|
..AgentConfig::default()
|
|
};
|
|
|
|
let executor = AgentExecutor::new(provider, mcp_client, config);
|
|
|
|
// Query that would require many tool calls
|
|
let result = executor
|
|
.run("Read every file in the project and summarize them all".to_string())
|
|
.await;
|
|
|
|
// Should hit the tool call budget (or parse error if LLM doesn't follow format)
|
|
match result {
|
|
Err(e) => {
|
|
let error_str = format!("{}", e);
|
|
// Accept either budget error or parse error (LLM didn't follow ReAct format)
|
|
assert!(
|
|
error_str.contains("Maximum iterations")
|
|
|| error_str.contains("budget")
|
|
|| error_str.contains("parse"),
|
|
"Expected budget or parse error, got: {}",
|
|
error_str
|
|
);
|
|
println!("Test passed: agent stopped with error: {}", error_str);
|
|
}
|
|
Ok(_) => {
|
|
println!("Agent completed within tool budget");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Helper function to create a test executor
|
|
// For parsing tests, we don't need a real connection
|
|
async fn create_test_executor() -> AgentExecutor {
|
|
// For parsing tests, we can accept the error from RemoteMcpClient::new()
|
|
// since we're only testing parse_response which doesn't use the MCP client
|
|
let provider = match RemoteMcpClient::new().await {
|
|
Ok(client) => Arc::new(client),
|
|
Err(_) => {
|
|
// If MCP server binary doesn't exist, parsing tests can still run
|
|
// by using a dummy client that will never be called
|
|
// This is a workaround for unit tests that only need parse_response
|
|
panic!("MCP server binary not found - build the project first with: cargo build --all");
|
|
}
|
|
};
|
|
|
|
let mcp_client = Arc::clone(&provider) as Arc<RemoteMcpClient>;
|
|
|
|
let config = AgentConfig::default();
|
|
AgentExecutor::new(provider, mcp_client, config)
|
|
}
|
|
|
|
#[test]
|
|
fn test_agent_config_defaults() {
|
|
let config = AgentConfig::default();
|
|
|
|
assert_eq!(config.max_iterations, 15);
|
|
assert_eq!(config.model, "llama3.2:latest");
|
|
assert_eq!(config.temperature, Some(0.7));
|
|
assert_eq!(config.system_prompt, None);
|
|
assert!(config.sub_agents.is_empty());
|
|
// max_tool_calls field removed - agent now tracks iterations instead
|
|
}
|
|
|
|
#[test]
|
|
fn test_agent_config_custom() {
|
|
let config = AgentConfig {
|
|
max_iterations: 15,
|
|
model: "custom-model".to_string(),
|
|
temperature: Some(0.5),
|
|
max_tokens: Some(2000),
|
|
system_prompt: Some("Custom prompt".to_string()),
|
|
sub_agents: Vec::new(),
|
|
};
|
|
|
|
assert_eq!(config.max_iterations, 15);
|
|
assert_eq!(config.model, "custom-model");
|
|
assert_eq!(config.temperature, Some(0.5));
|
|
assert_eq!(config.max_tokens, Some(2000));
|
|
}
|