From 3c436fda54e028c359bf644a96ad0390b96b5a08 Mon Sep 17 00:00:00 2001 From: vikingowl Date: Sat, 1 Nov 2025 20:33:28 +0100 Subject: [PATCH] feat(tools): implement M10 Jupyter notebook support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tools-notebook crate with full Jupyter notebook (.ipynb) support: - Core data structures: Notebook, Cell, NotebookMetadata, Output - Read/write operations with metadata preservation - Edit operations: EditCell, AddCell, DeleteCell - Helper functions: new_code_cell, new_markdown_cell, cell_source_as_string - Comprehensive test suite: 9 tests covering round-trip, editing, and error handling - Permission integration: NotebookRead (plan mode), NotebookEdit (acceptedits mode) Implements M10 from AGENTS.md for LLM-driven notebook editing. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Cargo.toml | 1 + crates/tools/notebook/Cargo.toml | 14 + crates/tools/notebook/src/lib.rs | 175 +++++++++++ crates/tools/notebook/tests/notebook_tests.rs | 280 ++++++++++++++++++ 4 files changed, 470 insertions(+) create mode 100644 crates/tools/notebook/Cargo.toml create mode 100644 crates/tools/notebook/src/lib.rs create mode 100644 crates/tools/notebook/tests/notebook_tests.rs diff --git a/Cargo.toml b/Cargo.toml index 7c0d03c..5315bcf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "crates/platform/permissions", "crates/tools/bash", "crates/tools/fs", + "crates/tools/notebook", "crates/tools/slash", "crates/tools/web", "crates/integration/mcp-client", diff --git a/crates/tools/notebook/Cargo.toml b/crates/tools/notebook/Cargo.toml new file mode 100644 index 0000000..0233621 --- /dev/null +++ b/crates/tools/notebook/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "tools-notebook" +version = "0.1.0" +edition.workspace = true +license.workspace = true +rust-version.workspace = true + +[dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" +color-eyre = "0.6" + +[dev-dependencies] +tempfile = "3.23.0" diff --git a/crates/tools/notebook/src/lib.rs b/crates/tools/notebook/src/lib.rs new file mode 100644 index 0000000..4efcfe4 --- /dev/null +++ b/crates/tools/notebook/src/lib.rs @@ -0,0 +1,175 @@ +use color_eyre::eyre::{Result, eyre}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use std::fs; +use std::path::Path; + +/// Jupyter notebook structure +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Notebook { + pub cells: Vec, + pub metadata: NotebookMetadata, + pub nbformat: i32, + pub nbformat_minor: i32, +} + +/// Notebook cell +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Cell { + pub cell_type: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub execution_count: Option, + pub metadata: HashMap, + pub source: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub outputs: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub id: Option, +} + +/// Cell output +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Output { + pub output_type: String, + #[serde(flatten)] + pub data: HashMap, +} + +/// Notebook metadata +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NotebookMetadata { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub kernelspec: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub language_info: Option, + #[serde(flatten)] + pub extra: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KernelSpec { + pub display_name: String, + pub language: String, + pub name: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LanguageInfo { + pub name: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub version: Option, + #[serde(flatten)] + pub extra: HashMap, +} + +/// Read a Jupyter notebook from a file +pub fn read_notebook>(path: P) -> Result { + let content = fs::read_to_string(path)?; + let notebook: Notebook = serde_json::from_str(&content)?; + Ok(notebook) +} + +/// Write a Jupyter notebook to a file +pub fn write_notebook>(path: P, notebook: &Notebook) -> Result<()> { + let content = serde_json::to_string_pretty(notebook)?; + fs::write(path, content)?; + Ok(()) +} + +/// Edit operations for notebooks +pub enum NotebookEdit { + /// Replace cell at index with new source + EditCell { index: usize, source: Vec }, + /// Add a new cell at index + AddCell { index: usize, cell: Cell }, + /// Delete cell at index + DeleteCell { index: usize }, +} + +/// Apply an edit to a notebook +pub fn edit_notebook(notebook: &mut Notebook, edit: NotebookEdit) -> Result<()> { + match edit { + NotebookEdit::EditCell { index, source } => { + if index >= notebook.cells.len() { + return Err(eyre!("Cell index {} out of bounds (notebook has {} cells)", index, notebook.cells.len())); + } + notebook.cells[index].source = source; + } + NotebookEdit::AddCell { index, cell } => { + if index > notebook.cells.len() { + return Err(eyre!("Cell index {} out of bounds (notebook has {} cells)", index, notebook.cells.len())); + } + notebook.cells.insert(index, cell); + } + NotebookEdit::DeleteCell { index } => { + if index >= notebook.cells.len() { + return Err(eyre!("Cell index {} out of bounds (notebook has {} cells)", index, notebook.cells.len())); + } + notebook.cells.remove(index); + } + } + Ok(()) +} + +/// Create a new code cell +pub fn new_code_cell(source: Vec) -> Cell { + Cell { + cell_type: "code".to_string(), + execution_count: None, + metadata: HashMap::new(), + source, + outputs: Vec::new(), + id: None, + } +} + +/// Create a new markdown cell +pub fn new_markdown_cell(source: Vec) -> Cell { + Cell { + cell_type: "markdown".to_string(), + execution_count: None, + metadata: HashMap::new(), + source, + outputs: Vec::new(), + id: None, + } +} + +/// Get cell source as a single string +pub fn cell_source_as_string(cell: &Cell) -> String { + cell.source.join("") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cell_source_concatenation() { + let cell = Cell { + cell_type: "code".to_string(), + execution_count: None, + metadata: HashMap::new(), + source: vec!["import pandas as pd\n".to_string(), "df = pd.DataFrame()\n".to_string()], + outputs: Vec::new(), + id: None, + }; + + let source = cell_source_as_string(&cell); + assert_eq!(source, "import pandas as pd\ndf = pd.DataFrame()\n"); + } + + #[test] + fn new_code_cell_creation() { + let cell = new_code_cell(vec!["print('hello')\n".to_string()]); + assert_eq!(cell.cell_type, "code"); + assert!(cell.outputs.is_empty()); + } + + #[test] + fn new_markdown_cell_creation() { + let cell = new_markdown_cell(vec!["# Title\n".to_string()]); + assert_eq!(cell.cell_type, "markdown"); + } +} diff --git a/crates/tools/notebook/tests/notebook_tests.rs b/crates/tools/notebook/tests/notebook_tests.rs new file mode 100644 index 0000000..2e732c7 --- /dev/null +++ b/crates/tools/notebook/tests/notebook_tests.rs @@ -0,0 +1,280 @@ +use tools_notebook::*; +use std::fs; +use tempfile::tempdir; + +#[test] +fn notebook_round_trip_preserves_metadata() { + let dir = tempdir().unwrap(); + let notebook_path = dir.path().join("test.ipynb"); + + // Create a sample notebook with metadata + let notebook_json = r##"{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "source": ["print('hello world')"], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": ["# Test Notebook", "This is a test."] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}"##; + + fs::write(¬ebook_path, notebook_json).unwrap(); + + // Read the notebook + let notebook = read_notebook(¬ebook_path).unwrap(); + + // Verify structure + assert_eq!(notebook.cells.len(), 2); + assert_eq!(notebook.nbformat, 4); + assert_eq!(notebook.nbformat_minor, 5); + + // Verify metadata + assert!(notebook.metadata.kernelspec.is_some()); + let kernelspec = notebook.metadata.kernelspec.as_ref().unwrap(); + assert_eq!(kernelspec.language, "python"); + assert_eq!(kernelspec.name, "python3"); + + assert!(notebook.metadata.language_info.is_some()); + let lang_info = notebook.metadata.language_info.as_ref().unwrap(); + assert_eq!(lang_info.name, "python"); + assert_eq!(lang_info.version, Some("3.9.0".to_string())); + + // Write it back + let output_path = dir.path().join("output.ipynb"); + write_notebook(&output_path, ¬ebook).unwrap(); + + // Read it again + let notebook2 = read_notebook(&output_path).unwrap(); + + // Verify metadata is preserved + assert_eq!(notebook2.nbformat, 4); + assert_eq!(notebook2.nbformat_minor, 5); + assert!(notebook2.metadata.kernelspec.is_some()); + assert_eq!( + notebook2.metadata.kernelspec.as_ref().unwrap().language, + "python" + ); +} + +#[test] +fn notebook_edit_cell_content() { + let dir = tempdir().unwrap(); + let notebook_path = dir.path().join("test.ipynb"); + + let notebook_json = r##"{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": ["x = 1"], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": ["y = 2"], + "outputs": [] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +}"##; + + fs::write(¬ebook_path, notebook_json).unwrap(); + + let mut notebook = read_notebook(¬ebook_path).unwrap(); + + // Edit the first cell + edit_notebook( + &mut notebook, + NotebookEdit::EditCell { + index: 0, + source: vec!["x = 10\n".to_string(), "print(x)\n".to_string()], + }, + ) + .unwrap(); + + // Verify the edit + assert_eq!(notebook.cells[0].source.len(), 2); + assert_eq!(notebook.cells[0].source[0], "x = 10\n"); + assert_eq!(notebook.cells[0].source[1], "print(x)\n"); + + // Second cell should be unchanged + assert_eq!(notebook.cells[1].source[0], "y = 2"); +} + +#[test] +fn notebook_add_delete_cells() { + let dir = tempdir().unwrap(); + let notebook_path = dir.path().join("test.ipynb"); + + let notebook_json = r##"{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": ["x = 1"], + "outputs": [] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +}"##; + + fs::write(¬ebook_path, notebook_json).unwrap(); + + let mut notebook = read_notebook(¬ebook_path).unwrap(); + assert_eq!(notebook.cells.len(), 1); + + // Add a cell at the end + let new_cell = new_code_cell(vec!["y = 2\n".to_string()]); + edit_notebook( + &mut notebook, + NotebookEdit::AddCell { + index: 1, + cell: new_cell, + }, + ) + .unwrap(); + + assert_eq!(notebook.cells.len(), 2); + assert_eq!(notebook.cells[1].source[0], "y = 2\n"); + + // Add a cell at the beginning + let first_cell = new_markdown_cell(vec!["# Header\n".to_string()]); + edit_notebook( + &mut notebook, + NotebookEdit::AddCell { + index: 0, + cell: first_cell, + }, + ) + .unwrap(); + + assert_eq!(notebook.cells.len(), 3); + assert_eq!(notebook.cells[0].cell_type, "markdown"); + assert_eq!(notebook.cells[0].source[0], "# Header\n"); + assert_eq!(notebook.cells[1].source[0], "x = 1"); // Original first cell is now second + + // Delete the middle cell + edit_notebook(&mut notebook, NotebookEdit::DeleteCell { index: 1 }).unwrap(); + + assert_eq!(notebook.cells.len(), 2); + assert_eq!(notebook.cells[0].cell_type, "markdown"); + assert_eq!(notebook.cells[1].source[0], "y = 2\n"); +} + +#[test] +fn notebook_edit_out_of_bounds() { + let dir = tempdir().unwrap(); + let notebook_path = dir.path().join("test.ipynb"); + + let notebook_json = r##"{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "source": ["x = 1\n"], + "outputs": [] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +}"##; + + fs::write(¬ebook_path, notebook_json).unwrap(); + + let mut notebook = read_notebook(¬ebook_path).unwrap(); + + // Try to edit non-existent cell + let result = edit_notebook( + &mut notebook, + NotebookEdit::EditCell { + index: 5, + source: vec!["bad\n".to_string()], + }, + ); + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("out of bounds")); +} + +#[test] +fn notebook_with_outputs_preserved() { + let dir = tempdir().unwrap(); + let notebook_path = dir.path().join("test.ipynb"); + + let notebook_json = r##"{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "source": ["print('hello')\n"], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": ["hello\n"] + } + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +}"##; + + fs::write(¬ebook_path, notebook_json).unwrap(); + + let notebook = read_notebook(¬ebook_path).unwrap(); + + assert_eq!(notebook.cells[0].outputs.len(), 1); + assert_eq!(notebook.cells[0].outputs[0].output_type, "stream"); + + // Write and read back + let output_path = dir.path().join("output.ipynb"); + write_notebook(&output_path, ¬ebook).unwrap(); + + let notebook2 = read_notebook(&output_path).unwrap(); + assert_eq!(notebook2.cells[0].outputs.len(), 1); + assert_eq!(notebook2.cells[0].outputs[0].output_type, "stream"); +} + +#[test] +fn cell_source_as_string_concatenates() { + let cell = new_code_cell(vec![ + "import numpy as np\n".to_string(), + "arr = np.array([1, 2, 3])\n".to_string(), + ]); + + let source = cell_source_as_string(&cell); + assert_eq!(source, "import numpy as np\narr = np.array([1, 2, 3])\n"); +}