Completes Phase 4 (Agentic Loop with ReAct), Phase 7 (Code Execution), and Phase 8 (Prompt Server) as specified in the implementation plan. **Phase 4: Agentic Loop with ReAct Pattern (agent.rs - 398 lines)** - Complete AgentExecutor with reasoning loop - LlmResponse enum: ToolCall, FinalAnswer, Reasoning - ReAct parser supporting THOUGHT/ACTION/ACTION_INPUT/FINAL_ANSWER - Tool discovery and execution integration - AgentResult with iteration tracking and message history - Integration with owlen-agent CLI binary and TUI **Phase 7: Code Execution with Docker Sandboxing** *Sandbox Module (sandbox.rs - 255 lines):* - Docker-based execution using bollard - Resource limits: 512MB memory, 50% CPU - Network isolation (no network access) - Timeout handling (30s default) - Container auto-cleanup - Support for Rust, Node.js, Python environments *Tool Suite (tools.rs - 410 lines):* - CompileProjectTool: Build projects with auto-detection - RunTestsTool: Execute test suites with optional filters - FormatCodeTool: Run formatters (rustfmt/prettier/black) - LintCodeTool: Run linters (clippy/eslint/pylint) - All tools support check-only and auto-fix modes *MCP Server (lib.rs - 183 lines):* - Full JSON-RPC protocol implementation - Tool registry with dynamic dispatch - Initialize/tools/list/tools/call support **Phase 8: Prompt Server with YAML & Handlebars** *Prompt Server (lib.rs - 405 lines):* - YAML-based template storage in ~/.config/owlen/prompts/ - Handlebars 6.0 template engine integration - PromptTemplate with metadata (name, version, mode, description) - Four MCP tools: - get_prompt: Retrieve template by name - render_prompt: Render with Handlebars variables - list_prompts: List all available templates - reload_prompts: Hot-reload from disk *Default Templates:* - chat_mode_system.yaml: ReAct prompt for chat mode - code_mode_system.yaml: ReAct prompt with code tools **Configuration & Integration:** - Added Agent module to owlen-core - Updated owlen-agent binary to use new AgentExecutor API - Updated TUI to integrate with agent result structure - Added error handling for Agent variant **Dependencies Added:** - bollard 0.17 (Docker API) - handlebars 6.0 (templating) - serde_yaml 0.9 (YAML parsing) - tempfile 3.0 (temporary directories) - uuid 1.0 with v4 feature **Tests:** - mode_tool_filter.rs: Tool filtering by mode - prompt_server.rs: Prompt management tests - Sandbox tests (Docker-dependent, marked #[ignore]) All code compiles successfully and follows project conventions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
251 lines
7.6 KiB
Rust
251 lines
7.6 KiB
Rust
//! Docker-based sandboxing for secure code execution
|
|
|
|
use anyhow::{Context, Result};
|
|
use bollard::container::{
|
|
Config, CreateContainerOptions, RemoveContainerOptions, StartContainerOptions,
|
|
WaitContainerOptions,
|
|
};
|
|
use bollard::models::{HostConfig, Mount, MountTypeEnum};
|
|
use bollard::Docker;
|
|
use std::collections::HashMap;
|
|
use std::path::Path;
|
|
|
|
/// Result of executing code in a sandbox
|
|
#[derive(Debug, Clone)]
|
|
pub struct ExecutionResult {
|
|
pub stdout: String,
|
|
pub stderr: String,
|
|
pub exit_code: i64,
|
|
pub timed_out: bool,
|
|
}
|
|
|
|
/// Docker-based sandbox executor
|
|
pub struct Sandbox {
|
|
docker: Docker,
|
|
memory_limit: i64,
|
|
cpu_quota: i64,
|
|
timeout_secs: u64,
|
|
}
|
|
|
|
impl Sandbox {
|
|
/// Create a new sandbox with default resource limits
|
|
pub fn new() -> Result<Self> {
|
|
let docker =
|
|
Docker::connect_with_local_defaults().context("Failed to connect to Docker daemon")?;
|
|
|
|
Ok(Self {
|
|
docker,
|
|
memory_limit: 512 * 1024 * 1024, // 512MB
|
|
cpu_quota: 50000, // 50% of one core
|
|
timeout_secs: 30,
|
|
})
|
|
}
|
|
|
|
/// Execute a command in a sandboxed container
|
|
pub async fn execute(
|
|
&self,
|
|
image: &str,
|
|
cmd: &[&str],
|
|
workspace: Option<&Path>,
|
|
env: HashMap<String, String>,
|
|
) -> Result<ExecutionResult> {
|
|
let container_name = format!("owlen-sandbox-{}", uuid::Uuid::new_v4());
|
|
|
|
// Prepare volume mount if workspace provided
|
|
let mounts = if let Some(ws) = workspace {
|
|
vec![Mount {
|
|
target: Some("/workspace".to_string()),
|
|
source: Some(ws.to_string_lossy().to_string()),
|
|
typ: Some(MountTypeEnum::BIND),
|
|
read_only: Some(false),
|
|
..Default::default()
|
|
}]
|
|
} else {
|
|
vec![]
|
|
};
|
|
|
|
// Create container config
|
|
let host_config = HostConfig {
|
|
memory: Some(self.memory_limit),
|
|
cpu_quota: Some(self.cpu_quota),
|
|
network_mode: Some("none".to_string()), // No network access
|
|
mounts: Some(mounts),
|
|
auto_remove: Some(true),
|
|
..Default::default()
|
|
};
|
|
|
|
let config = Config {
|
|
image: Some(image.to_string()),
|
|
cmd: Some(cmd.iter().map(|s| s.to_string()).collect()),
|
|
working_dir: Some("/workspace".to_string()),
|
|
env: Some(env.iter().map(|(k, v)| format!("{}={}", k, v)).collect()),
|
|
host_config: Some(host_config),
|
|
attach_stdout: Some(true),
|
|
attach_stderr: Some(true),
|
|
tty: Some(false),
|
|
..Default::default()
|
|
};
|
|
|
|
// Create container
|
|
let container = self
|
|
.docker
|
|
.create_container(
|
|
Some(CreateContainerOptions {
|
|
name: container_name.clone(),
|
|
..Default::default()
|
|
}),
|
|
config,
|
|
)
|
|
.await
|
|
.context("Failed to create container")?;
|
|
|
|
// Start container
|
|
self.docker
|
|
.start_container(&container.id, None::<StartContainerOptions<String>>)
|
|
.await
|
|
.context("Failed to start container")?;
|
|
|
|
// Wait for container with timeout
|
|
let wait_result =
|
|
tokio::time::timeout(std::time::Duration::from_secs(self.timeout_secs), async {
|
|
let mut wait_stream = self
|
|
.docker
|
|
.wait_container(&container.id, None::<WaitContainerOptions<String>>);
|
|
|
|
use futures::StreamExt;
|
|
if let Some(result) = wait_stream.next().await {
|
|
result
|
|
} else {
|
|
Err(bollard::errors::Error::IOError {
|
|
err: std::io::Error::other("Container wait stream ended unexpectedly"),
|
|
})
|
|
}
|
|
})
|
|
.await;
|
|
|
|
let (exit_code, timed_out) = match wait_result {
|
|
Ok(Ok(result)) => (result.status_code, false),
|
|
Ok(Err(e)) => {
|
|
eprintln!("Container wait error: {}", e);
|
|
(1, false)
|
|
}
|
|
Err(_) => {
|
|
// Timeout - kill the container
|
|
let _ = self
|
|
.docker
|
|
.kill_container(
|
|
&container.id,
|
|
None::<bollard::container::KillContainerOptions<String>>,
|
|
)
|
|
.await;
|
|
(124, true)
|
|
}
|
|
};
|
|
|
|
// Get logs
|
|
let logs = self.docker.logs(
|
|
&container.id,
|
|
Some(bollard::container::LogsOptions::<String> {
|
|
stdout: true,
|
|
stderr: true,
|
|
..Default::default()
|
|
}),
|
|
);
|
|
|
|
use futures::StreamExt;
|
|
let mut stdout = String::new();
|
|
let mut stderr = String::new();
|
|
|
|
let log_result = tokio::time::timeout(std::time::Duration::from_secs(5), async {
|
|
let mut logs = logs;
|
|
while let Some(log) = logs.next().await {
|
|
match log {
|
|
Ok(bollard::container::LogOutput::StdOut { message }) => {
|
|
stdout.push_str(&String::from_utf8_lossy(&message));
|
|
}
|
|
Ok(bollard::container::LogOutput::StdErr { message }) => {
|
|
stderr.push_str(&String::from_utf8_lossy(&message));
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
})
|
|
.await;
|
|
|
|
if log_result.is_err() {
|
|
eprintln!("Timeout reading container logs");
|
|
}
|
|
|
|
// Remove container (auto_remove should handle this, but be explicit)
|
|
let _ = self
|
|
.docker
|
|
.remove_container(
|
|
&container.id,
|
|
Some(RemoveContainerOptions {
|
|
force: true,
|
|
..Default::default()
|
|
}),
|
|
)
|
|
.await;
|
|
|
|
Ok(ExecutionResult {
|
|
stdout,
|
|
stderr,
|
|
exit_code,
|
|
timed_out,
|
|
})
|
|
}
|
|
|
|
/// Execute in a Rust environment
|
|
pub async fn execute_rust(&self, workspace: &Path, cmd: &[&str]) -> Result<ExecutionResult> {
|
|
self.execute("rust:1.75-slim", cmd, Some(workspace), HashMap::new())
|
|
.await
|
|
}
|
|
|
|
/// Execute in a Python environment
|
|
pub async fn execute_python(&self, workspace: &Path, cmd: &[&str]) -> Result<ExecutionResult> {
|
|
self.execute("python:3.11-slim", cmd, Some(workspace), HashMap::new())
|
|
.await
|
|
}
|
|
|
|
/// Execute in a Node.js environment
|
|
pub async fn execute_node(&self, workspace: &Path, cmd: &[&str]) -> Result<ExecutionResult> {
|
|
self.execute("node:20-slim", cmd, Some(workspace), HashMap::new())
|
|
.await
|
|
}
|
|
}
|
|
|
|
impl Default for Sandbox {
|
|
fn default() -> Self {
|
|
Self::new().expect("Failed to create default sandbox")
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use tempfile::TempDir;
|
|
|
|
#[tokio::test]
|
|
#[ignore] // Requires Docker daemon
|
|
async fn test_sandbox_rust_compile() {
|
|
let sandbox = Sandbox::new().unwrap();
|
|
let temp_dir = TempDir::new().unwrap();
|
|
|
|
// Create a simple Rust project
|
|
std::fs::write(
|
|
temp_dir.path().join("main.rs"),
|
|
"fn main() { println!(\"Hello from sandbox!\"); }",
|
|
)
|
|
.unwrap();
|
|
|
|
let result = sandbox
|
|
.execute_rust(temp_dir.path(), &["rustc", "main.rs"])
|
|
.await
|
|
.unwrap();
|
|
|
|
assert_eq!(result.exit_code, 0);
|
|
assert!(!result.timed_out);
|
|
}
|
|
}
|