Add word wrapping and cursor mapping utilities to core library; integrate advanced text input support in TUI. Update dependencies accordingly.

2025-09-28 01:47:50 +02:00
parent 6ddc66d864
commit ccf9349f99
11 changed files with 754 additions and 96 deletions
--- a/crates/owlen-core/Cargo.toml
+++ b/crates/owlen-core/Cargo.toml
@@ -5,20 +5,20 @@ edition = "2021"
 description = "Core traits and types for OWLEN LLM client"

 [dependencies]
-serde = { workspace = true }
-serde_json = { workspace = true }
-uuid = { workspace = true }
-anyhow = { workspace = true }
-thiserror = { workspace = true }
-tokio = { workspace = true }
-futures = { workspace = true }
-tokio-stream = { workspace = true }
-async-trait = "0.1"
-textwrap = { workspace = true }
-toml = { workspace = true }
-shellexpand = { workspace = true }
-regex = "1"
-once_cell = "1.21.3"
+anyhow = "1.0.75"
+log = "0.4.20"
+serde = { version = "1.0.188", features = ["derive"] }
+serde_json = "1.0.105"
+thiserror = "1.0.48"
+tokio = { version = "1.32.0", features = ["full"] }
+unicode-segmentation = "1.11"
+unicode-width = "0.1"
+uuid = { version = "1.4.1", features = ["v4", "serde"] }
+textwrap = "0.16.0"
+futures = "0.3.28"
+async-trait = "0.1.73"
+toml = "0.8.0"
+shellexpand = "3.1.0"

 [dev-dependencies]
 tokio-test = { workspace = true }
--- a/crates/owlen-core/src/formatting.rs
+++ b/crates/owlen-core/src/formatting.rs
@@ -49,11 +49,11 @@ impl MessageFormatter {
        // 2) Collapse: remove whitespace-only lines; keep exactly one '\n' between content lines
        let mut content = normalized
            .split('\n')
-            .map(|l| l.trim_end())               // trim trailing spaces per line
-            .filter(|l| !l.trim().is_empty())    // drop blank/whitespace-only lines
+            .map(|l| l.trim_end()) // trim trailing spaces per line
+            .filter(|l| !l.trim().is_empty()) // drop blank/whitespace-only lines
            .collect::<Vec<_>>()
            .join("\n")
-            .trim()                               // trim leading/trailing whitespace
+            .trim() // trim leading/trailing whitespace
            .to_string();

        if content.is_empty() && self.preserve_empty_lines {
@@ -73,8 +73,12 @@ impl MessageFormatter {
            .collect();

        // 5) Belt & suspenders: remove leading/trailing blanks if any survived
-        while lines.first().map_or(false, |s| s.trim().is_empty()) { lines.remove(0); }
-        while lines.last().map_or(false,  |s| s.trim().is_empty()) { lines.pop(); }
+        while lines.first().map_or(false, |s| s.trim().is_empty()) {
+            lines.remove(0);
+        }
+        while lines.last().map_or(false, |s| s.trim().is_empty()) {
+            lines.pop();
+        }

        lines
    }
--- a/crates/owlen-core/src/lib.rs
+++ b/crates/owlen-core/src/lib.rs
@@ -21,7 +21,7 @@ pub use model::*;
 pub use provider::*;
 pub use router::*;
 pub use session::*;
-pub use types::*;
+pub mod wrap_cursor;

 /// Result type used throughout the OWLEN ecosystem
 pub type Result<T> = std::result::Result<T, Error>;
--- a/crates/owlen-core/src/wrap_cursor.rs
+++ b/crates/owlen-core/src/wrap_cursor.rs
@@ -0,0 +1,92 @@
+#![allow(clippy::cast_possible_truncation)]
+
+use unicode_segmentation::UnicodeSegmentation;
+use unicode_width::UnicodeWidthStr;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct ScreenPos {
+    pub row: u16,
+    pub col: u16,
+}
+
+pub fn build_cursor_map(text: &str, width: u16) -> Vec<ScreenPos> {
+    assert!(width > 0);
+    let width = width as usize;
+    let mut pos_map = vec![ScreenPos { row: 0, col: 0 }; text.len() + 1];
+    let mut row = 0;
+    let mut col = 0;
+
+    let mut word_start_idx = 0;
+    let mut word_start_col = 0;
+
+    for (byte_offset, grapheme) in text.grapheme_indices(true) {
+        let grapheme_width = UnicodeWidthStr::width(grapheme);
+
+        if grapheme == "\n" {
+            row += 1;
+            col = 0;
+            word_start_col = 0;
+            word_start_idx = byte_offset + grapheme.len();
+            // Set position for the end of this grapheme and any intermediate bytes
+            let end_pos = ScreenPos {
+                row: row as u16,
+                col: col as u16,
+            };
+            for i in 1..=grapheme.len() {
+                if byte_offset + i < pos_map.len() {
+                    pos_map[byte_offset + i] = end_pos;
+                }
+            }
+            continue;
+        }
+
+        if grapheme.chars().all(char::is_whitespace) {
+            if col + grapheme_width > width {
+                // Whitespace causes wrap
+                row += 1;
+                col = 1; // Position after wrapping space
+                word_start_col = 1;
+                word_start_idx = byte_offset + grapheme.len();
+            } else {
+                col += grapheme_width;
+                word_start_col = col;
+                word_start_idx = byte_offset + grapheme.len();
+            }
+        } else {
+            if col + grapheme_width > width {
+                if word_start_col > 0 && byte_offset == word_start_idx {
+                    // This is the first character of a new word that won't fit, wrap it
+                    row += 1;
+                    col = grapheme_width;
+                } else if word_start_col == 0 {
+                    // No previous word boundary, hard break
+                    row += 1;
+                    col = grapheme_width;
+                } else {
+                    // This is part of a word already on the line, let it extend beyond width
+                    col += grapheme_width;
+                }
+            } else {
+                col += grapheme_width;
+            }
+        }
+
+        // Set position for the end of this grapheme and any intermediate bytes
+        let end_pos = ScreenPos {
+            row: row as u16,
+            col: col as u16,
+        };
+        for i in 1..=grapheme.len() {
+            if byte_offset + i < pos_map.len() {
+                pos_map[byte_offset + i] = end_pos;
+            }
+        }
+    }
+
+    pos_map
+}
+
+pub fn byte_to_screen_pos(text: &str, byte_idx: usize, width: u16) -> ScreenPos {
+    let pos_map = build_cursor_map(text, width);
+    pos_map[byte_idx.min(text.len())]
+}
--- a/crates/owlen-core/tests/long_word_debug.rs
+++ b/crates/owlen-core/tests/long_word_debug.rs
@@ -0,0 +1,115 @@
+use owlen_core::wrap_cursor::build_cursor_map;
+
+#[test]
+fn debug_long_word_wrapping() {
+    // Test the exact scenario from the user's issue
+    let text = "asdnklasdnaklsdnkalsdnaskldaskldnaskldnaskldnaskldnaskldnaskldnaskld asdnklska dnskadl dasnksdl asdn";
+    let width = 50; // Approximate width from the user's example
+
+    println!("Testing long word text with width {}", width);
+    println!("Text: '{}'", text);
+
+    // Check what the cursor map shows
+    let cursor_map = build_cursor_map(text, width);
+
+    println!("\nCursor map for key positions:");
+    let long_word_end = text.find(' ').unwrap_or(text.len());
+    for i in [
+        0,
+        10,
+        20,
+        30,
+        40,
+        50,
+        60,
+        70,
+        long_word_end,
+        long_word_end + 1,
+        text.len(),
+    ] {
+        if i <= text.len() {
+            let pos = cursor_map[i];
+            let char_at = if i < text.len() {
+                format!("'{}'", text.chars().nth(i).unwrap_or('?'))
+            } else {
+                "END".to_string()
+            };
+            println!(
+                "  Byte {}: {} -> row {}, col {}",
+                i, char_at, pos.row, pos.col
+            );
+        }
+    }
+
+    // Test what my formatting function produces
+    let lines = format_text_with_word_wrap_debug(text, width);
+
+    println!("\nFormatted lines:");
+    for (i, line) in lines.iter().enumerate() {
+        println!("  Line {}: '{}' (length: {})", i, line, line.len());
+    }
+
+    // The long word should be broken up, not kept on one line
+    assert!(
+        lines[0].len() <= width as usize + 5,
+        "First line is too long: {} chars",
+        lines[0].len()
+    );
+}
+
+fn format_text_with_word_wrap_debug(text: &str, width: u16) -> Vec<String> {
+    if text.is_empty() {
+        return vec!["".to_string()];
+    }
+
+    // Use the cursor map to determine where line breaks should occur
+    let cursor_map = build_cursor_map(text, width);
+
+    let mut lines = Vec::new();
+    let mut current_line = String::new();
+    let mut current_row = 0;
+
+    for (byte_idx, ch) in text.char_indices() {
+        let pos_before = if byte_idx > 0 {
+            cursor_map[byte_idx]
+        } else {
+            cursor_map[0]
+        };
+        let pos_after = cursor_map[byte_idx + ch.len_utf8()];
+
+        println!(
+            "Processing '{}' at byte {}: before=({},{}) after=({},{})",
+            ch, byte_idx, pos_before.row, pos_before.col, pos_after.row, pos_after.col
+        );
+
+        // If the row changed, we need to start a new line
+        if pos_after.row > current_row {
+            println!(
+                "  Row changed from {} to {}! Finishing line: '{}'",
+                current_row, pos_after.row, current_line
+            );
+            if !current_line.is_empty() {
+                lines.push(current_line.clone());
+                current_line.clear();
+            }
+            current_row = pos_after.row;
+
+            // If this character is a space that caused the wrap, don't include it
+            if ch.is_whitespace() && pos_before.row < pos_after.row {
+                println!("  Skipping wrapping space");
+                continue; // Skip the wrapping space
+            }
+        }
+
+        current_line.push(ch);
+    }
+
+    // Add the final line
+    if !current_line.is_empty() {
+        lines.push(current_line);
+    } else if lines.is_empty() {
+        lines.push("".to_string());
+    }
+
+    lines
+}
--- a/crates/owlen-core/tests/wrap_cursor_tests.rs
+++ b/crates/owlen-core/tests/wrap_cursor_tests.rs
@@ -0,0 +1,96 @@
+#![allow(non_snake_case)]
+
+use owlen_core::wrap_cursor::{build_cursor_map, ScreenPos};
+
+fn assert_cursor_pos(map: &[ScreenPos], byte_idx: usize, expected: ScreenPos) {
+    assert_eq!(map[byte_idx], expected, "Mismatch at byte {}", byte_idx);
+}
+
+#[test]
+fn test_basic_wrap_at_spaces() {
+    let text = "hello world";
+    let width = 5;
+    let map = build_cursor_map(text, width);
+
+    assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
+    assert_cursor_pos(&map, 5, ScreenPos { row: 0, col: 5 }); // after "hello"
+    assert_cursor_pos(&map, 6, ScreenPos { row: 1, col: 1 }); // after "hello "
+    assert_cursor_pos(&map, 11, ScreenPos { row: 1, col: 6 }); // after "world"
+}
+
+#[test]
+fn test_hard_line_break() {
+    let text = "a\nb";
+    let width = 10;
+    let map = build_cursor_map(text, width);
+
+    assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
+    assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 }); // after "a"
+    assert_cursor_pos(&map, 2, ScreenPos { row: 1, col: 0 }); // after "\n"
+    assert_cursor_pos(&map, 3, ScreenPos { row: 1, col: 1 }); // after "b"
+}
+
+#[test]
+fn test_long_word_split() {
+    let text = "abcdefgh";
+    let width = 3;
+    let map = build_cursor_map(text, width);
+
+    assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
+    assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 });
+    assert_cursor_pos(&map, 2, ScreenPos { row: 0, col: 2 });
+    assert_cursor_pos(&map, 3, ScreenPos { row: 0, col: 3 });
+    assert_cursor_pos(&map, 4, ScreenPos { row: 1, col: 1 });
+    assert_cursor_pos(&map, 5, ScreenPos { row: 1, col: 2 });
+    assert_cursor_pos(&map, 6, ScreenPos { row: 1, col: 3 });
+    assert_cursor_pos(&map, 7, ScreenPos { row: 2, col: 1 });
+    assert_cursor_pos(&map, 8, ScreenPos { row: 2, col: 2 });
+}
+
+#[test]
+fn test_trailing_spaces_preserved() {
+    let text = "x  y";
+    let width = 2;
+    let map = build_cursor_map(text, width);
+
+    assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
+    assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 }); // after "x"
+    assert_cursor_pos(&map, 2, ScreenPos { row: 0, col: 2 }); // after "x "
+    assert_cursor_pos(&map, 3, ScreenPos { row: 1, col: 1 }); // after "x  "
+    assert_cursor_pos(&map, 4, ScreenPos { row: 1, col: 2 }); // after "y"
+}
+
+#[test]
+fn test_graphemes_emoji() {
+    let text = "🙂🙂a";
+    let width = 3;
+    let map = build_cursor_map(text, width);
+
+    assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
+    assert_cursor_pos(&map, 4, ScreenPos { row: 0, col: 2 }); // after first emoji
+    assert_cursor_pos(&map, 8, ScreenPos { row: 1, col: 2 }); // after second emoji
+    assert_cursor_pos(&map, 9, ScreenPos { row: 1, col: 3 }); // after "a"
+}
+
+#[test]
+fn test_graphemes_combining() {
+    let text = "e\u{0301}";
+    let width = 10;
+    let map = build_cursor_map(text, width);
+
+    assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
+    assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 }); // after "e"
+    assert_cursor_pos(&map, 3, ScreenPos { row: 0, col: 1 }); // after combining mark
+}
+
+#[test]
+fn test_exact_edge() {
+    let text = "abc def";
+    let width = 3;
+    let map = build_cursor_map(text, width);
+
+    assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
+    assert_cursor_pos(&map, 3, ScreenPos { row: 0, col: 3 }); // after "abc"
+    assert_cursor_pos(&map, 4, ScreenPos { row: 1, col: 1 }); // after " "
+    assert_cursor_pos(&map, 7, ScreenPos { row: 1, col: 4 }); // after "def"
+}