feat(tools): implement Bash tool with persistent sessions and timeouts (M4 complete)
This commit implements the complete M4 milestone (Bash tool) including: Bash Session: - Persistent bash session using tokio::process - Environment variables persist between commands - Current working directory persists between commands - Session-based execution (not one-off commands) - Automatic cleanup on session close Key Features: - Command timeout support (default: 2 minutes, configurable per-command) - Output truncation (max 2000 lines for stdout/stderr) - Exit code capture and propagation - Stderr capture alongside stdout - Command delimiter system to reliably detect command completion - Automatic backup of exit codes to temp files Implementation Details: - Uses tokio::process for async command execution - BashSession maintains single bash process across multiple commands - stdio handles (stdin/stdout/stderr) are taken and restored for each command - Non-blocking stderr reading with timeout to avoid deadlocks - Mutex protection for concurrent access safety CLI Integration: - Added `bash` subcommand: `owlen bash <command> [--timeout <ms>]` - Permission checks with command context for pattern matching - Stdout/stderr properly routed to respective streams - Exit code propagation (exits with same code as bash command) Permission Enforcement: - Plan mode (default): blocks Bash (asks for approval) - Code mode: allows Bash - Pattern matching support for command-specific rules (e.g., "npm test*") Testing: - 7 tests in tools-bash for session behavior - bash_persists_env_between_calls ✅ - bash_persists_cwd_between_calls ✅ - bash_command_timeout ✅ - bash_output_truncation ✅ - bash_command_failure_returns_error_code ✅ - bash_stderr_captured ✅ - bash_multiple_commands_in_sequence ✅ - 3 new tests in CLI for permission enforcement - plan_mode_blocks_bash_operations ✅ - code_mode_allows_bash ✅ - bash_command_timeout_works ✅ - All 43 workspace tests passing ✅ Dependencies Added: - tokio with process, io-util, time, sync features M4 milestone complete! ✅ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ members = [
|
||||
"crates/llm/ollama",
|
||||
"crates/platform/config",
|
||||
"crates/platform/permissions",
|
||||
"crates/tools/bash",
|
||||
"crates/tools/fs",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
@@ -13,6 +13,7 @@ serde_json = "1"
|
||||
color-eyre = "0.6"
|
||||
llm-ollama = { path = "../../llm/ollama" }
|
||||
tools-fs = { path = "../../tools/fs" }
|
||||
tools-bash = { path = "../../tools/bash" }
|
||||
config-agent = { package = "config-agent", path = "../../platform/config" }
|
||||
permissions = { path = "../../platform/permissions" }
|
||||
futures-util = "0.3.31"
|
||||
|
||||
@@ -13,6 +13,7 @@ enum Cmd {
|
||||
Grep { root: String, pattern: String },
|
||||
Write { path: String, content: String },
|
||||
Edit { path: String, old_string: String, new_string: String },
|
||||
Bash { command: String, #[arg(long)] timeout: Option<u64> },
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
@@ -143,6 +144,42 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
}
|
||||
Cmd::Bash { command, timeout } => {
|
||||
// Check permission with command context for pattern matching
|
||||
match perms.check(Tool::Bash, Some(&command)) {
|
||||
PermissionDecision::Allow => {
|
||||
let mut session = tools_bash::BashSession::new().await?;
|
||||
let output = session.execute(&command, timeout).await?;
|
||||
|
||||
// Print stdout
|
||||
if !output.stdout.is_empty() {
|
||||
print!("{}", output.stdout);
|
||||
}
|
||||
|
||||
// Print stderr to stderr
|
||||
if !output.stderr.is_empty() {
|
||||
eprint!("{}", output.stderr);
|
||||
}
|
||||
|
||||
session.close().await?;
|
||||
|
||||
// Exit with same code as command
|
||||
if !output.success {
|
||||
std::process::exit(output.exit_code);
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
PermissionDecision::Ask => {
|
||||
return Err(eyre!(
|
||||
"Permission denied: Bash operation requires approval. Use --mode code to allow."
|
||||
));
|
||||
}
|
||||
PermissionDecision::Deny => {
|
||||
return Err(eyre!("Permission denied: Bash operation is blocked."));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -148,3 +148,32 @@ fn code_mode_allows_all_operations() {
|
||||
|
||||
assert_eq!(fs::read_to_string(&file).unwrap(), "modified content");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_mode_blocks_bash_operations() {
|
||||
// Bash operation should be blocked in plan mode (default)
|
||||
let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("owlen"));
|
||||
cmd.arg("bash").arg("echo hello");
|
||||
cmd.assert().failure();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_allows_bash() {
|
||||
// Bash operation should work in code mode
|
||||
let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("owlen"));
|
||||
cmd.arg("--mode").arg("code").arg("bash").arg("echo hello");
|
||||
cmd.assert().success().stdout("hello\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_command_timeout_works() {
|
||||
// Test that timeout works
|
||||
let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("owlen"));
|
||||
cmd.arg("--mode")
|
||||
.arg("code")
|
||||
.arg("bash")
|
||||
.arg("sleep 10")
|
||||
.arg("--timeout")
|
||||
.arg("1000");
|
||||
cmd.assert().failure();
|
||||
}
|
||||
|
||||
14
crates/tools/bash/Cargo.toml
Normal file
14
crates/tools/bash/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "tools-bash"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tokio = { version = "1.39", features = ["process", "io-util", "time", "sync"] }
|
||||
color-eyre = "0.6"
|
||||
tempfile = "3.23.0"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1.39", features = ["macros", "rt-multi-thread"] }
|
||||
170
crates/tools/bash/src/lib.rs
Normal file
170
crates/tools/bash/src/lib.rs
Normal file
@@ -0,0 +1,170 @@
|
||||
use color_eyre::eyre::{Result, eyre};
|
||||
use std::process::Stdio;
|
||||
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
|
||||
use tokio::process::{Child, Command};
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::time::{timeout, Duration};
|
||||
|
||||
const MAX_OUTPUT_LINES: usize = 2000;
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
const COMMAND_DELIMITER: &str = "___OWLEN_CMD_END___";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CommandOutput {
|
||||
pub stdout: String,
|
||||
pub stderr: String,
|
||||
pub exit_code: i32,
|
||||
pub success: bool,
|
||||
}
|
||||
|
||||
pub struct BashSession {
|
||||
child: Mutex<Child>,
|
||||
}
|
||||
|
||||
impl BashSession {
|
||||
/// Create a new persistent bash session
|
||||
pub async fn new() -> Result<Self> {
|
||||
let child = Command::new("bash")
|
||||
.arg("--norc")
|
||||
.arg("--noprofile")
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.kill_on_drop(true)
|
||||
.spawn()?;
|
||||
|
||||
// Verify the process started
|
||||
if child.stdin.is_none() || child.stdout.is_none() || child.stderr.is_none() {
|
||||
return Err(eyre!("Failed to capture bash process stdio"));
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
child: Mutex::new(child),
|
||||
})
|
||||
}
|
||||
|
||||
/// Execute a command in the persistent bash session
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `command` - The bash command to execute
|
||||
/// * `timeout_ms` - Optional timeout in milliseconds (default: 2 minutes)
|
||||
pub async fn execute(&mut self, command: &str, timeout_ms: Option<u64>) -> Result<CommandOutput> {
|
||||
let timeout_duration = Duration::from_millis(timeout_ms.unwrap_or(DEFAULT_TIMEOUT_MS));
|
||||
|
||||
let result = timeout(timeout_duration, self.execute_internal(command)).await;
|
||||
|
||||
match result {
|
||||
Ok(output) => output,
|
||||
Err(_) => Err(eyre!("Command timed out after {}ms", timeout_duration.as_millis())),
|
||||
}
|
||||
}
|
||||
|
||||
async fn execute_internal(&mut self, command: &str) -> Result<CommandOutput> {
|
||||
let mut child = self.child.lock().await;
|
||||
|
||||
// Take ownership of stdio handles
|
||||
let mut stdin = child.stdin.take().ok_or_else(|| eyre!("No stdin"))?;
|
||||
let stdout = child.stdout.take().ok_or_else(|| eyre!("No stdout"))?;
|
||||
let stderr = child.stderr.take().ok_or_else(|| eyre!("No stderr"))?;
|
||||
|
||||
// Write command with delimiter and exit code capture
|
||||
let full_command = format!(
|
||||
"{}\necho $? > /tmp/owlen_exit_code_$$.tmp\necho '{}'\n",
|
||||
command, COMMAND_DELIMITER
|
||||
);
|
||||
stdin.write_all(full_command.as_bytes()).await?;
|
||||
stdin.flush().await?;
|
||||
|
||||
// Read stdout until delimiter
|
||||
let mut stdout_reader = BufReader::new(stdout);
|
||||
let mut stdout_lines = Vec::new();
|
||||
let mut line = String::new();
|
||||
|
||||
loop {
|
||||
line.clear();
|
||||
let n = stdout_reader.read_line(&mut line).await?;
|
||||
if n == 0 {
|
||||
return Err(eyre!("Bash process terminated unexpectedly"));
|
||||
}
|
||||
|
||||
if line.trim() == COMMAND_DELIMITER {
|
||||
break;
|
||||
}
|
||||
|
||||
stdout_lines.push(line.clone());
|
||||
|
||||
// Truncate if too many lines
|
||||
if stdout_lines.len() > MAX_OUTPUT_LINES {
|
||||
stdout_lines.push("<<<...output truncated...>>>\n".to_string());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Read stderr (non-blocking, best effort)
|
||||
let mut stderr_reader = BufReader::new(stderr);
|
||||
let mut stderr_lines = Vec::new();
|
||||
let mut stderr_line = String::new();
|
||||
|
||||
// Try to read stderr without blocking indefinitely
|
||||
while let Ok(result) = timeout(Duration::from_millis(100), stderr_reader.read_line(&mut stderr_line)).await {
|
||||
match result {
|
||||
Ok(n) if n > 0 => {
|
||||
stderr_lines.push(stderr_line.clone());
|
||||
stderr_line.clear();
|
||||
|
||||
if stderr_lines.len() > MAX_OUTPUT_LINES {
|
||||
stderr_lines.push("<<<...stderr truncated...>>>\n".to_string());
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Read exit code
|
||||
let exit_code_cmd = "cat /tmp/owlen_exit_code_$$.tmp 2>/dev/null; rm -f /tmp/owlen_exit_code_$$.tmp\n";
|
||||
stdin.write_all(exit_code_cmd.as_bytes()).await?;
|
||||
stdin.flush().await?;
|
||||
|
||||
let mut exit_line = String::new();
|
||||
stdout_reader.read_line(&mut exit_line).await?;
|
||||
|
||||
let exit_code: i32 = exit_line.trim().parse().unwrap_or(0);
|
||||
|
||||
// Restore stdio handles
|
||||
child.stdin = Some(stdin);
|
||||
child.stdout = Some(stdout_reader.into_inner());
|
||||
child.stderr = Some(stderr_reader.into_inner());
|
||||
|
||||
Ok(CommandOutput {
|
||||
stdout: stdout_lines.join(""),
|
||||
stderr: stderr_lines.join(""),
|
||||
exit_code,
|
||||
success: exit_code == 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Close the bash session
|
||||
pub async fn close(self) -> Result<()> {
|
||||
let mut child = self.child.into_inner();
|
||||
|
||||
if let Some(mut stdin) = child.stdin.take() {
|
||||
let _ = stdin.write_all(b"exit\n").await;
|
||||
let _ = stdin.flush().await;
|
||||
}
|
||||
|
||||
let _ = child.wait().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn can_create_session() {
|
||||
let session = BashSession::new().await;
|
||||
assert!(session.is_ok());
|
||||
}
|
||||
}
|
||||
107
crates/tools/bash/tests/bash_session.rs
Normal file
107
crates/tools/bash/tests/bash_session.rs
Normal file
@@ -0,0 +1,107 @@
|
||||
use tools_bash::BashSession;
|
||||
|
||||
#[tokio::test]
|
||||
async fn bash_persists_env_between_calls() {
|
||||
let mut session = BashSession::new().await.unwrap();
|
||||
|
||||
// Set an environment variable
|
||||
let output1 = session.execute("export TEST_VAR=hello", None).await.unwrap();
|
||||
assert!(output1.success);
|
||||
|
||||
// Verify it persists in next command
|
||||
let output2 = session.execute("echo $TEST_VAR", None).await.unwrap();
|
||||
assert!(output2.success);
|
||||
assert!(output2.stdout.contains("hello"));
|
||||
|
||||
session.close().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bash_persists_cwd_between_calls() {
|
||||
let mut session = BashSession::new().await.unwrap();
|
||||
|
||||
// Change to /tmp
|
||||
let output1 = session.execute("cd /tmp", None).await.unwrap();
|
||||
assert!(output1.success);
|
||||
|
||||
// Verify cwd persists
|
||||
let output2 = session.execute("pwd", None).await.unwrap();
|
||||
assert!(output2.success);
|
||||
assert!(output2.stdout.trim().ends_with("/tmp"));
|
||||
|
||||
session.close().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bash_command_timeout() {
|
||||
let mut session = BashSession::new().await.unwrap();
|
||||
|
||||
// Command that sleeps for 5 seconds, but with 1 second timeout
|
||||
let result = session.execute("sleep 5", Some(1000)).await;
|
||||
|
||||
assert!(result.is_err());
|
||||
let err_msg = result.unwrap_err().to_string();
|
||||
assert!(err_msg.contains("timeout") || err_msg.contains("timed out"));
|
||||
|
||||
session.close().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bash_output_truncation() {
|
||||
let mut session = BashSession::new().await.unwrap();
|
||||
|
||||
// Generate a lot of output
|
||||
let output = session
|
||||
.execute("for i in {1..100}; do echo 'Line '$i; done", None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(output.success);
|
||||
// Should have output but might be truncated
|
||||
assert!(!output.stdout.is_empty());
|
||||
|
||||
session.close().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bash_command_failure_returns_error_code() {
|
||||
let mut session = BashSession::new().await.unwrap();
|
||||
|
||||
let output = session.execute("false", None).await.unwrap();
|
||||
assert!(!output.success);
|
||||
assert_eq!(output.exit_code, 1);
|
||||
|
||||
session.close().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bash_stderr_captured() {
|
||||
let mut session = BashSession::new().await.unwrap();
|
||||
|
||||
let output = session
|
||||
.execute("echo 'error message' >&2", None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(output.success);
|
||||
assert!(output.stderr.contains("error message"));
|
||||
|
||||
session.close().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bash_multiple_commands_in_sequence() {
|
||||
let mut session = BashSession::new().await.unwrap();
|
||||
|
||||
// Set a variable
|
||||
session.execute("X=1", None).await.unwrap();
|
||||
|
||||
// Increment it
|
||||
session.execute("X=$((X + 1))", None).await.unwrap();
|
||||
|
||||
// Verify final value
|
||||
let output = session.execute("echo $X", None).await.unwrap();
|
||||
assert!(output.stdout.contains("2"));
|
||||
|
||||
session.close().await.unwrap();
|
||||
}
|
||||
Reference in New Issue
Block a user