Files
owlen/crates/owlen-mcp-code-server/src/sandbox.rs
vikingowl e94df2c48a feat(phases4,7,8): implement Agent/ReAct, Code Execution, and Prompt Server
Completes Phase 4 (Agentic Loop with ReAct), Phase 7 (Code Execution),
and Phase 8 (Prompt Server) as specified in the implementation plan.

**Phase 4: Agentic Loop with ReAct Pattern (agent.rs - 398 lines)**
- Complete AgentExecutor with reasoning loop
- LlmResponse enum: ToolCall, FinalAnswer, Reasoning
- ReAct parser supporting THOUGHT/ACTION/ACTION_INPUT/FINAL_ANSWER
- Tool discovery and execution integration
- AgentResult with iteration tracking and message history
- Integration with owlen-agent CLI binary and TUI

**Phase 7: Code Execution with Docker Sandboxing**

*Sandbox Module (sandbox.rs - 255 lines):*
- Docker-based execution using bollard
- Resource limits: 512MB memory, 50% CPU
- Network isolation (no network access)
- Timeout handling (30s default)
- Container auto-cleanup
- Support for Rust, Node.js, Python environments

*Tool Suite (tools.rs - 410 lines):*
- CompileProjectTool: Build projects with auto-detection
- RunTestsTool: Execute test suites with optional filters
- FormatCodeTool: Run formatters (rustfmt/prettier/black)
- LintCodeTool: Run linters (clippy/eslint/pylint)
- All tools support check-only and auto-fix modes

*MCP Server (lib.rs - 183 lines):*
- Full JSON-RPC protocol implementation
- Tool registry with dynamic dispatch
- Initialize/tools/list/tools/call support

**Phase 8: Prompt Server with YAML & Handlebars**

*Prompt Server (lib.rs - 405 lines):*
- YAML-based template storage in ~/.config/owlen/prompts/
- Handlebars 6.0 template engine integration
- PromptTemplate with metadata (name, version, mode, description)
- Four MCP tools:
  - get_prompt: Retrieve template by name
  - render_prompt: Render with Handlebars variables
  - list_prompts: List all available templates
  - reload_prompts: Hot-reload from disk

*Default Templates:*
- chat_mode_system.yaml: ReAct prompt for chat mode
- code_mode_system.yaml: ReAct prompt with code tools

**Configuration & Integration:**
- Added Agent module to owlen-core
- Updated owlen-agent binary to use new AgentExecutor API
- Updated TUI to integrate with agent result structure
- Added error handling for Agent variant

**Dependencies Added:**
- bollard 0.17 (Docker API)
- handlebars 6.0 (templating)
- serde_yaml 0.9 (YAML parsing)
- tempfile 3.0 (temporary directories)
- uuid 1.0 with v4 feature

**Tests:**
- mode_tool_filter.rs: Tool filtering by mode
- prompt_server.rs: Prompt management tests
- Sandbox tests (Docker-dependent, marked #[ignore])

All code compiles successfully and follows project conventions.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-10 20:50:40 +02:00

251 lines
7.6 KiB
Rust

//! Docker-based sandboxing for secure code execution
use anyhow::{Context, Result};
use bollard::container::{
Config, CreateContainerOptions, RemoveContainerOptions, StartContainerOptions,
WaitContainerOptions,
};
use bollard::models::{HostConfig, Mount, MountTypeEnum};
use bollard::Docker;
use std::collections::HashMap;
use std::path::Path;
/// Result of executing code in a sandbox
#[derive(Debug, Clone)]
pub struct ExecutionResult {
pub stdout: String,
pub stderr: String,
pub exit_code: i64,
pub timed_out: bool,
}
/// Docker-based sandbox executor
pub struct Sandbox {
docker: Docker,
memory_limit: i64,
cpu_quota: i64,
timeout_secs: u64,
}
impl Sandbox {
/// Create a new sandbox with default resource limits
pub fn new() -> Result<Self> {
let docker =
Docker::connect_with_local_defaults().context("Failed to connect to Docker daemon")?;
Ok(Self {
docker,
memory_limit: 512 * 1024 * 1024, // 512MB
cpu_quota: 50000, // 50% of one core
timeout_secs: 30,
})
}
/// Execute a command in a sandboxed container
pub async fn execute(
&self,
image: &str,
cmd: &[&str],
workspace: Option<&Path>,
env: HashMap<String, String>,
) -> Result<ExecutionResult> {
let container_name = format!("owlen-sandbox-{}", uuid::Uuid::new_v4());
// Prepare volume mount if workspace provided
let mounts = if let Some(ws) = workspace {
vec![Mount {
target: Some("/workspace".to_string()),
source: Some(ws.to_string_lossy().to_string()),
typ: Some(MountTypeEnum::BIND),
read_only: Some(false),
..Default::default()
}]
} else {
vec![]
};
// Create container config
let host_config = HostConfig {
memory: Some(self.memory_limit),
cpu_quota: Some(self.cpu_quota),
network_mode: Some("none".to_string()), // No network access
mounts: Some(mounts),
auto_remove: Some(true),
..Default::default()
};
let config = Config {
image: Some(image.to_string()),
cmd: Some(cmd.iter().map(|s| s.to_string()).collect()),
working_dir: Some("/workspace".to_string()),
env: Some(env.iter().map(|(k, v)| format!("{}={}", k, v)).collect()),
host_config: Some(host_config),
attach_stdout: Some(true),
attach_stderr: Some(true),
tty: Some(false),
..Default::default()
};
// Create container
let container = self
.docker
.create_container(
Some(CreateContainerOptions {
name: container_name.clone(),
..Default::default()
}),
config,
)
.await
.context("Failed to create container")?;
// Start container
self.docker
.start_container(&container.id, None::<StartContainerOptions<String>>)
.await
.context("Failed to start container")?;
// Wait for container with timeout
let wait_result =
tokio::time::timeout(std::time::Duration::from_secs(self.timeout_secs), async {
let mut wait_stream = self
.docker
.wait_container(&container.id, None::<WaitContainerOptions<String>>);
use futures::StreamExt;
if let Some(result) = wait_stream.next().await {
result
} else {
Err(bollard::errors::Error::IOError {
err: std::io::Error::other("Container wait stream ended unexpectedly"),
})
}
})
.await;
let (exit_code, timed_out) = match wait_result {
Ok(Ok(result)) => (result.status_code, false),
Ok(Err(e)) => {
eprintln!("Container wait error: {}", e);
(1, false)
}
Err(_) => {
// Timeout - kill the container
let _ = self
.docker
.kill_container(
&container.id,
None::<bollard::container::KillContainerOptions<String>>,
)
.await;
(124, true)
}
};
// Get logs
let logs = self.docker.logs(
&container.id,
Some(bollard::container::LogsOptions::<String> {
stdout: true,
stderr: true,
..Default::default()
}),
);
use futures::StreamExt;
let mut stdout = String::new();
let mut stderr = String::new();
let log_result = tokio::time::timeout(std::time::Duration::from_secs(5), async {
let mut logs = logs;
while let Some(log) = logs.next().await {
match log {
Ok(bollard::container::LogOutput::StdOut { message }) => {
stdout.push_str(&String::from_utf8_lossy(&message));
}
Ok(bollard::container::LogOutput::StdErr { message }) => {
stderr.push_str(&String::from_utf8_lossy(&message));
}
_ => {}
}
}
})
.await;
if log_result.is_err() {
eprintln!("Timeout reading container logs");
}
// Remove container (auto_remove should handle this, but be explicit)
let _ = self
.docker
.remove_container(
&container.id,
Some(RemoveContainerOptions {
force: true,
..Default::default()
}),
)
.await;
Ok(ExecutionResult {
stdout,
stderr,
exit_code,
timed_out,
})
}
/// Execute in a Rust environment
pub async fn execute_rust(&self, workspace: &Path, cmd: &[&str]) -> Result<ExecutionResult> {
self.execute("rust:1.75-slim", cmd, Some(workspace), HashMap::new())
.await
}
/// Execute in a Python environment
pub async fn execute_python(&self, workspace: &Path, cmd: &[&str]) -> Result<ExecutionResult> {
self.execute("python:3.11-slim", cmd, Some(workspace), HashMap::new())
.await
}
/// Execute in a Node.js environment
pub async fn execute_node(&self, workspace: &Path, cmd: &[&str]) -> Result<ExecutionResult> {
self.execute("node:20-slim", cmd, Some(workspace), HashMap::new())
.await
}
}
impl Default for Sandbox {
fn default() -> Self {
Self::new().expect("Failed to create default sandbox")
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[tokio::test]
#[ignore] // Requires Docker daemon
async fn test_sandbox_rust_compile() {
let sandbox = Sandbox::new().unwrap();
let temp_dir = TempDir::new().unwrap();
// Create a simple Rust project
std::fs::write(
temp_dir.path().join("main.rs"),
"fn main() { println!(\"Hello from sandbox!\"); }",
)
.unwrap();
let result = sandbox
.execute_rust(temp_dir.path(), &["rustc", "main.rs"])
.await
.unwrap();
assert_eq!(result.exit_code, 0);
assert!(!result.timed_out);
}
}