Add word wrapping and cursor mapping utilities to core library; integrate advanced text input support in TUI. Update dependencies accordingly.

This commit is contained in:
2025-09-28 01:47:50 +02:00
parent 6ddc66d864
commit ccf9349f99
11 changed files with 754 additions and 96 deletions

View File

@@ -5,20 +5,20 @@ edition = "2021"
description = "Core traits and types for OWLEN LLM client"
[dependencies]
serde = { workspace = true }
serde_json = { workspace = true }
uuid = { workspace = true }
anyhow = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
futures = { workspace = true }
tokio-stream = { workspace = true }
async-trait = "0.1"
textwrap = { workspace = true }
toml = { workspace = true }
shellexpand = { workspace = true }
regex = "1"
once_cell = "1.21.3"
anyhow = "1.0.75"
log = "0.4.20"
serde = { version = "1.0.188", features = ["derive"] }
serde_json = "1.0.105"
thiserror = "1.0.48"
tokio = { version = "1.32.0", features = ["full"] }
unicode-segmentation = "1.11"
unicode-width = "0.1"
uuid = { version = "1.4.1", features = ["v4", "serde"] }
textwrap = "0.16.0"
futures = "0.3.28"
async-trait = "0.1.73"
toml = "0.8.0"
shellexpand = "3.1.0"
[dev-dependencies]
tokio-test = { workspace = true }

View File

@@ -49,11 +49,11 @@ impl MessageFormatter {
// 2) Collapse: remove whitespace-only lines; keep exactly one '\n' between content lines
let mut content = normalized
.split('\n')
.map(|l| l.trim_end()) // trim trailing spaces per line
.filter(|l| !l.trim().is_empty()) // drop blank/whitespace-only lines
.map(|l| l.trim_end()) // trim trailing spaces per line
.filter(|l| !l.trim().is_empty()) // drop blank/whitespace-only lines
.collect::<Vec<_>>()
.join("\n")
.trim() // trim leading/trailing whitespace
.trim() // trim leading/trailing whitespace
.to_string();
if content.is_empty() && self.preserve_empty_lines {
@@ -73,8 +73,12 @@ impl MessageFormatter {
.collect();
// 5) Belt & suspenders: remove leading/trailing blanks if any survived
while lines.first().map_or(false, |s| s.trim().is_empty()) { lines.remove(0); }
while lines.last().map_or(false, |s| s.trim().is_empty()) { lines.pop(); }
while lines.first().map_or(false, |s| s.trim().is_empty()) {
lines.remove(0);
}
while lines.last().map_or(false, |s| s.trim().is_empty()) {
lines.pop();
}
lines
}

View File

@@ -21,7 +21,7 @@ pub use model::*;
pub use provider::*;
pub use router::*;
pub use session::*;
pub use types::*;
pub mod wrap_cursor;
/// Result type used throughout the OWLEN ecosystem
pub type Result<T> = std::result::Result<T, Error>;

View File

@@ -0,0 +1,92 @@
#![allow(clippy::cast_possible_truncation)]
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ScreenPos {
pub row: u16,
pub col: u16,
}
pub fn build_cursor_map(text: &str, width: u16) -> Vec<ScreenPos> {
assert!(width > 0);
let width = width as usize;
let mut pos_map = vec![ScreenPos { row: 0, col: 0 }; text.len() + 1];
let mut row = 0;
let mut col = 0;
let mut word_start_idx = 0;
let mut word_start_col = 0;
for (byte_offset, grapheme) in text.grapheme_indices(true) {
let grapheme_width = UnicodeWidthStr::width(grapheme);
if grapheme == "\n" {
row += 1;
col = 0;
word_start_col = 0;
word_start_idx = byte_offset + grapheme.len();
// Set position for the end of this grapheme and any intermediate bytes
let end_pos = ScreenPos {
row: row as u16,
col: col as u16,
};
for i in 1..=grapheme.len() {
if byte_offset + i < pos_map.len() {
pos_map[byte_offset + i] = end_pos;
}
}
continue;
}
if grapheme.chars().all(char::is_whitespace) {
if col + grapheme_width > width {
// Whitespace causes wrap
row += 1;
col = 1; // Position after wrapping space
word_start_col = 1;
word_start_idx = byte_offset + grapheme.len();
} else {
col += grapheme_width;
word_start_col = col;
word_start_idx = byte_offset + grapheme.len();
}
} else {
if col + grapheme_width > width {
if word_start_col > 0 && byte_offset == word_start_idx {
// This is the first character of a new word that won't fit, wrap it
row += 1;
col = grapheme_width;
} else if word_start_col == 0 {
// No previous word boundary, hard break
row += 1;
col = grapheme_width;
} else {
// This is part of a word already on the line, let it extend beyond width
col += grapheme_width;
}
} else {
col += grapheme_width;
}
}
// Set position for the end of this grapheme and any intermediate bytes
let end_pos = ScreenPos {
row: row as u16,
col: col as u16,
};
for i in 1..=grapheme.len() {
if byte_offset + i < pos_map.len() {
pos_map[byte_offset + i] = end_pos;
}
}
}
pos_map
}
pub fn byte_to_screen_pos(text: &str, byte_idx: usize, width: u16) -> ScreenPos {
let pos_map = build_cursor_map(text, width);
pos_map[byte_idx.min(text.len())]
}

View File

@@ -0,0 +1,115 @@
use owlen_core::wrap_cursor::build_cursor_map;
#[test]
fn debug_long_word_wrapping() {
// Test the exact scenario from the user's issue
let text = "asdnklasdnaklsdnkalsdnaskldaskldnaskldnaskldnaskldnaskldnaskldnaskld asdnklska dnskadl dasnksdl asdn";
let width = 50; // Approximate width from the user's example
println!("Testing long word text with width {}", width);
println!("Text: '{}'", text);
// Check what the cursor map shows
let cursor_map = build_cursor_map(text, width);
println!("\nCursor map for key positions:");
let long_word_end = text.find(' ').unwrap_or(text.len());
for i in [
0,
10,
20,
30,
40,
50,
60,
70,
long_word_end,
long_word_end + 1,
text.len(),
] {
if i <= text.len() {
let pos = cursor_map[i];
let char_at = if i < text.len() {
format!("'{}'", text.chars().nth(i).unwrap_or('?'))
} else {
"END".to_string()
};
println!(
" Byte {}: {} -> row {}, col {}",
i, char_at, pos.row, pos.col
);
}
}
// Test what my formatting function produces
let lines = format_text_with_word_wrap_debug(text, width);
println!("\nFormatted lines:");
for (i, line) in lines.iter().enumerate() {
println!(" Line {}: '{}' (length: {})", i, line, line.len());
}
// The long word should be broken up, not kept on one line
assert!(
lines[0].len() <= width as usize + 5,
"First line is too long: {} chars",
lines[0].len()
);
}
fn format_text_with_word_wrap_debug(text: &str, width: u16) -> Vec<String> {
if text.is_empty() {
return vec!["".to_string()];
}
// Use the cursor map to determine where line breaks should occur
let cursor_map = build_cursor_map(text, width);
let mut lines = Vec::new();
let mut current_line = String::new();
let mut current_row = 0;
for (byte_idx, ch) in text.char_indices() {
let pos_before = if byte_idx > 0 {
cursor_map[byte_idx]
} else {
cursor_map[0]
};
let pos_after = cursor_map[byte_idx + ch.len_utf8()];
println!(
"Processing '{}' at byte {}: before=({},{}) after=({},{})",
ch, byte_idx, pos_before.row, pos_before.col, pos_after.row, pos_after.col
);
// If the row changed, we need to start a new line
if pos_after.row > current_row {
println!(
" Row changed from {} to {}! Finishing line: '{}'",
current_row, pos_after.row, current_line
);
if !current_line.is_empty() {
lines.push(current_line.clone());
current_line.clear();
}
current_row = pos_after.row;
// If this character is a space that caused the wrap, don't include it
if ch.is_whitespace() && pos_before.row < pos_after.row {
println!(" Skipping wrapping space");
continue; // Skip the wrapping space
}
}
current_line.push(ch);
}
// Add the final line
if !current_line.is_empty() {
lines.push(current_line);
} else if lines.is_empty() {
lines.push("".to_string());
}
lines
}

View File

@@ -0,0 +1,96 @@
#![allow(non_snake_case)]
use owlen_core::wrap_cursor::{build_cursor_map, ScreenPos};
fn assert_cursor_pos(map: &[ScreenPos], byte_idx: usize, expected: ScreenPos) {
assert_eq!(map[byte_idx], expected, "Mismatch at byte {}", byte_idx);
}
#[test]
fn test_basic_wrap_at_spaces() {
let text = "hello world";
let width = 5;
let map = build_cursor_map(text, width);
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
assert_cursor_pos(&map, 5, ScreenPos { row: 0, col: 5 }); // after "hello"
assert_cursor_pos(&map, 6, ScreenPos { row: 1, col: 1 }); // after "hello "
assert_cursor_pos(&map, 11, ScreenPos { row: 1, col: 6 }); // after "world"
}
#[test]
fn test_hard_line_break() {
let text = "a\nb";
let width = 10;
let map = build_cursor_map(text, width);
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 }); // after "a"
assert_cursor_pos(&map, 2, ScreenPos { row: 1, col: 0 }); // after "\n"
assert_cursor_pos(&map, 3, ScreenPos { row: 1, col: 1 }); // after "b"
}
#[test]
fn test_long_word_split() {
let text = "abcdefgh";
let width = 3;
let map = build_cursor_map(text, width);
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 });
assert_cursor_pos(&map, 2, ScreenPos { row: 0, col: 2 });
assert_cursor_pos(&map, 3, ScreenPos { row: 0, col: 3 });
assert_cursor_pos(&map, 4, ScreenPos { row: 1, col: 1 });
assert_cursor_pos(&map, 5, ScreenPos { row: 1, col: 2 });
assert_cursor_pos(&map, 6, ScreenPos { row: 1, col: 3 });
assert_cursor_pos(&map, 7, ScreenPos { row: 2, col: 1 });
assert_cursor_pos(&map, 8, ScreenPos { row: 2, col: 2 });
}
#[test]
fn test_trailing_spaces_preserved() {
let text = "x y";
let width = 2;
let map = build_cursor_map(text, width);
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 }); // after "x"
assert_cursor_pos(&map, 2, ScreenPos { row: 0, col: 2 }); // after "x "
assert_cursor_pos(&map, 3, ScreenPos { row: 1, col: 1 }); // after "x "
assert_cursor_pos(&map, 4, ScreenPos { row: 1, col: 2 }); // after "y"
}
#[test]
fn test_graphemes_emoji() {
let text = "🙂🙂a";
let width = 3;
let map = build_cursor_map(text, width);
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
assert_cursor_pos(&map, 4, ScreenPos { row: 0, col: 2 }); // after first emoji
assert_cursor_pos(&map, 8, ScreenPos { row: 1, col: 2 }); // after second emoji
assert_cursor_pos(&map, 9, ScreenPos { row: 1, col: 3 }); // after "a"
}
#[test]
fn test_graphemes_combining() {
let text = "e\u{0301}";
let width = 10;
let map = build_cursor_map(text, width);
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 }); // after "e"
assert_cursor_pos(&map, 3, ScreenPos { row: 0, col: 1 }); // after combining mark
}
#[test]
fn test_exact_edge() {
let text = "abc def";
let width = 3;
let map = build_cursor_map(text, width);
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
assert_cursor_pos(&map, 3, ScreenPos { row: 0, col: 3 }); // after "abc"
assert_cursor_pos(&map, 4, ScreenPos { row: 1, col: 1 }); // after " "
assert_cursor_pos(&map, 7, ScreenPos { row: 1, col: 4 }); // after "def"
}