Add word wrapping and cursor mapping utilities to core library; integrate advanced text input support in TUI. Update dependencies accordingly.
This commit is contained in:
@@ -5,20 +5,20 @@ edition = "2021"
|
||||
description = "Core traits and types for OWLEN LLM client"
|
||||
|
||||
[dependencies]
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
tokio-stream = { workspace = true }
|
||||
async-trait = "0.1"
|
||||
textwrap = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
shellexpand = { workspace = true }
|
||||
regex = "1"
|
||||
once_cell = "1.21.3"
|
||||
anyhow = "1.0.75"
|
||||
log = "0.4.20"
|
||||
serde = { version = "1.0.188", features = ["derive"] }
|
||||
serde_json = "1.0.105"
|
||||
thiserror = "1.0.48"
|
||||
tokio = { version = "1.32.0", features = ["full"] }
|
||||
unicode-segmentation = "1.11"
|
||||
unicode-width = "0.1"
|
||||
uuid = { version = "1.4.1", features = ["v4", "serde"] }
|
||||
textwrap = "0.16.0"
|
||||
futures = "0.3.28"
|
||||
async-trait = "0.1.73"
|
||||
toml = "0.8.0"
|
||||
shellexpand = "3.1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio-test = { workspace = true }
|
||||
|
||||
@@ -49,11 +49,11 @@ impl MessageFormatter {
|
||||
// 2) Collapse: remove whitespace-only lines; keep exactly one '\n' between content lines
|
||||
let mut content = normalized
|
||||
.split('\n')
|
||||
.map(|l| l.trim_end()) // trim trailing spaces per line
|
||||
.filter(|l| !l.trim().is_empty()) // drop blank/whitespace-only lines
|
||||
.map(|l| l.trim_end()) // trim trailing spaces per line
|
||||
.filter(|l| !l.trim().is_empty()) // drop blank/whitespace-only lines
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
.trim() // trim leading/trailing whitespace
|
||||
.trim() // trim leading/trailing whitespace
|
||||
.to_string();
|
||||
|
||||
if content.is_empty() && self.preserve_empty_lines {
|
||||
@@ -73,8 +73,12 @@ impl MessageFormatter {
|
||||
.collect();
|
||||
|
||||
// 5) Belt & suspenders: remove leading/trailing blanks if any survived
|
||||
while lines.first().map_or(false, |s| s.trim().is_empty()) { lines.remove(0); }
|
||||
while lines.last().map_or(false, |s| s.trim().is_empty()) { lines.pop(); }
|
||||
while lines.first().map_or(false, |s| s.trim().is_empty()) {
|
||||
lines.remove(0);
|
||||
}
|
||||
while lines.last().map_or(false, |s| s.trim().is_empty()) {
|
||||
lines.pop();
|
||||
}
|
||||
|
||||
lines
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ pub use model::*;
|
||||
pub use provider::*;
|
||||
pub use router::*;
|
||||
pub use session::*;
|
||||
pub use types::*;
|
||||
pub mod wrap_cursor;
|
||||
|
||||
/// Result type used throughout the OWLEN ecosystem
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
92
crates/owlen-core/src/wrap_cursor.rs
Normal file
92
crates/owlen-core/src/wrap_cursor.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
#![allow(clippy::cast_possible_truncation)]
|
||||
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct ScreenPos {
|
||||
pub row: u16,
|
||||
pub col: u16,
|
||||
}
|
||||
|
||||
pub fn build_cursor_map(text: &str, width: u16) -> Vec<ScreenPos> {
|
||||
assert!(width > 0);
|
||||
let width = width as usize;
|
||||
let mut pos_map = vec![ScreenPos { row: 0, col: 0 }; text.len() + 1];
|
||||
let mut row = 0;
|
||||
let mut col = 0;
|
||||
|
||||
let mut word_start_idx = 0;
|
||||
let mut word_start_col = 0;
|
||||
|
||||
for (byte_offset, grapheme) in text.grapheme_indices(true) {
|
||||
let grapheme_width = UnicodeWidthStr::width(grapheme);
|
||||
|
||||
if grapheme == "\n" {
|
||||
row += 1;
|
||||
col = 0;
|
||||
word_start_col = 0;
|
||||
word_start_idx = byte_offset + grapheme.len();
|
||||
// Set position for the end of this grapheme and any intermediate bytes
|
||||
let end_pos = ScreenPos {
|
||||
row: row as u16,
|
||||
col: col as u16,
|
||||
};
|
||||
for i in 1..=grapheme.len() {
|
||||
if byte_offset + i < pos_map.len() {
|
||||
pos_map[byte_offset + i] = end_pos;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if grapheme.chars().all(char::is_whitespace) {
|
||||
if col + grapheme_width > width {
|
||||
// Whitespace causes wrap
|
||||
row += 1;
|
||||
col = 1; // Position after wrapping space
|
||||
word_start_col = 1;
|
||||
word_start_idx = byte_offset + grapheme.len();
|
||||
} else {
|
||||
col += grapheme_width;
|
||||
word_start_col = col;
|
||||
word_start_idx = byte_offset + grapheme.len();
|
||||
}
|
||||
} else {
|
||||
if col + grapheme_width > width {
|
||||
if word_start_col > 0 && byte_offset == word_start_idx {
|
||||
// This is the first character of a new word that won't fit, wrap it
|
||||
row += 1;
|
||||
col = grapheme_width;
|
||||
} else if word_start_col == 0 {
|
||||
// No previous word boundary, hard break
|
||||
row += 1;
|
||||
col = grapheme_width;
|
||||
} else {
|
||||
// This is part of a word already on the line, let it extend beyond width
|
||||
col += grapheme_width;
|
||||
}
|
||||
} else {
|
||||
col += grapheme_width;
|
||||
}
|
||||
}
|
||||
|
||||
// Set position for the end of this grapheme and any intermediate bytes
|
||||
let end_pos = ScreenPos {
|
||||
row: row as u16,
|
||||
col: col as u16,
|
||||
};
|
||||
for i in 1..=grapheme.len() {
|
||||
if byte_offset + i < pos_map.len() {
|
||||
pos_map[byte_offset + i] = end_pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pos_map
|
||||
}
|
||||
|
||||
pub fn byte_to_screen_pos(text: &str, byte_idx: usize, width: u16) -> ScreenPos {
|
||||
let pos_map = build_cursor_map(text, width);
|
||||
pos_map[byte_idx.min(text.len())]
|
||||
}
|
||||
115
crates/owlen-core/tests/long_word_debug.rs
Normal file
115
crates/owlen-core/tests/long_word_debug.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
use owlen_core::wrap_cursor::build_cursor_map;
|
||||
|
||||
#[test]
|
||||
fn debug_long_word_wrapping() {
|
||||
// Test the exact scenario from the user's issue
|
||||
let text = "asdnklasdnaklsdnkalsdnaskldaskldnaskldnaskldnaskldnaskldnaskldnaskld asdnklska dnskadl dasnksdl asdn";
|
||||
let width = 50; // Approximate width from the user's example
|
||||
|
||||
println!("Testing long word text with width {}", width);
|
||||
println!("Text: '{}'", text);
|
||||
|
||||
// Check what the cursor map shows
|
||||
let cursor_map = build_cursor_map(text, width);
|
||||
|
||||
println!("\nCursor map for key positions:");
|
||||
let long_word_end = text.find(' ').unwrap_or(text.len());
|
||||
for i in [
|
||||
0,
|
||||
10,
|
||||
20,
|
||||
30,
|
||||
40,
|
||||
50,
|
||||
60,
|
||||
70,
|
||||
long_word_end,
|
||||
long_word_end + 1,
|
||||
text.len(),
|
||||
] {
|
||||
if i <= text.len() {
|
||||
let pos = cursor_map[i];
|
||||
let char_at = if i < text.len() {
|
||||
format!("'{}'", text.chars().nth(i).unwrap_or('?'))
|
||||
} else {
|
||||
"END".to_string()
|
||||
};
|
||||
println!(
|
||||
" Byte {}: {} -> row {}, col {}",
|
||||
i, char_at, pos.row, pos.col
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Test what my formatting function produces
|
||||
let lines = format_text_with_word_wrap_debug(text, width);
|
||||
|
||||
println!("\nFormatted lines:");
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
println!(" Line {}: '{}' (length: {})", i, line, line.len());
|
||||
}
|
||||
|
||||
// The long word should be broken up, not kept on one line
|
||||
assert!(
|
||||
lines[0].len() <= width as usize + 5,
|
||||
"First line is too long: {} chars",
|
||||
lines[0].len()
|
||||
);
|
||||
}
|
||||
|
||||
fn format_text_with_word_wrap_debug(text: &str, width: u16) -> Vec<String> {
|
||||
if text.is_empty() {
|
||||
return vec!["".to_string()];
|
||||
}
|
||||
|
||||
// Use the cursor map to determine where line breaks should occur
|
||||
let cursor_map = build_cursor_map(text, width);
|
||||
|
||||
let mut lines = Vec::new();
|
||||
let mut current_line = String::new();
|
||||
let mut current_row = 0;
|
||||
|
||||
for (byte_idx, ch) in text.char_indices() {
|
||||
let pos_before = if byte_idx > 0 {
|
||||
cursor_map[byte_idx]
|
||||
} else {
|
||||
cursor_map[0]
|
||||
};
|
||||
let pos_after = cursor_map[byte_idx + ch.len_utf8()];
|
||||
|
||||
println!(
|
||||
"Processing '{}' at byte {}: before=({},{}) after=({},{})",
|
||||
ch, byte_idx, pos_before.row, pos_before.col, pos_after.row, pos_after.col
|
||||
);
|
||||
|
||||
// If the row changed, we need to start a new line
|
||||
if pos_after.row > current_row {
|
||||
println!(
|
||||
" Row changed from {} to {}! Finishing line: '{}'",
|
||||
current_row, pos_after.row, current_line
|
||||
);
|
||||
if !current_line.is_empty() {
|
||||
lines.push(current_line.clone());
|
||||
current_line.clear();
|
||||
}
|
||||
current_row = pos_after.row;
|
||||
|
||||
// If this character is a space that caused the wrap, don't include it
|
||||
if ch.is_whitespace() && pos_before.row < pos_after.row {
|
||||
println!(" Skipping wrapping space");
|
||||
continue; // Skip the wrapping space
|
||||
}
|
||||
}
|
||||
|
||||
current_line.push(ch);
|
||||
}
|
||||
|
||||
// Add the final line
|
||||
if !current_line.is_empty() {
|
||||
lines.push(current_line);
|
||||
} else if lines.is_empty() {
|
||||
lines.push("".to_string());
|
||||
}
|
||||
|
||||
lines
|
||||
}
|
||||
96
crates/owlen-core/tests/wrap_cursor_tests.rs
Normal file
96
crates/owlen-core/tests/wrap_cursor_tests.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use owlen_core::wrap_cursor::{build_cursor_map, ScreenPos};
|
||||
|
||||
fn assert_cursor_pos(map: &[ScreenPos], byte_idx: usize, expected: ScreenPos) {
|
||||
assert_eq!(map[byte_idx], expected, "Mismatch at byte {}", byte_idx);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_basic_wrap_at_spaces() {
|
||||
let text = "hello world";
|
||||
let width = 5;
|
||||
let map = build_cursor_map(text, width);
|
||||
|
||||
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
|
||||
assert_cursor_pos(&map, 5, ScreenPos { row: 0, col: 5 }); // after "hello"
|
||||
assert_cursor_pos(&map, 6, ScreenPos { row: 1, col: 1 }); // after "hello "
|
||||
assert_cursor_pos(&map, 11, ScreenPos { row: 1, col: 6 }); // after "world"
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hard_line_break() {
|
||||
let text = "a\nb";
|
||||
let width = 10;
|
||||
let map = build_cursor_map(text, width);
|
||||
|
||||
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
|
||||
assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 }); // after "a"
|
||||
assert_cursor_pos(&map, 2, ScreenPos { row: 1, col: 0 }); // after "\n"
|
||||
assert_cursor_pos(&map, 3, ScreenPos { row: 1, col: 1 }); // after "b"
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_long_word_split() {
|
||||
let text = "abcdefgh";
|
||||
let width = 3;
|
||||
let map = build_cursor_map(text, width);
|
||||
|
||||
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
|
||||
assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 });
|
||||
assert_cursor_pos(&map, 2, ScreenPos { row: 0, col: 2 });
|
||||
assert_cursor_pos(&map, 3, ScreenPos { row: 0, col: 3 });
|
||||
assert_cursor_pos(&map, 4, ScreenPos { row: 1, col: 1 });
|
||||
assert_cursor_pos(&map, 5, ScreenPos { row: 1, col: 2 });
|
||||
assert_cursor_pos(&map, 6, ScreenPos { row: 1, col: 3 });
|
||||
assert_cursor_pos(&map, 7, ScreenPos { row: 2, col: 1 });
|
||||
assert_cursor_pos(&map, 8, ScreenPos { row: 2, col: 2 });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trailing_spaces_preserved() {
|
||||
let text = "x y";
|
||||
let width = 2;
|
||||
let map = build_cursor_map(text, width);
|
||||
|
||||
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
|
||||
assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 }); // after "x"
|
||||
assert_cursor_pos(&map, 2, ScreenPos { row: 0, col: 2 }); // after "x "
|
||||
assert_cursor_pos(&map, 3, ScreenPos { row: 1, col: 1 }); // after "x "
|
||||
assert_cursor_pos(&map, 4, ScreenPos { row: 1, col: 2 }); // after "y"
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphemes_emoji() {
|
||||
let text = "🙂🙂a";
|
||||
let width = 3;
|
||||
let map = build_cursor_map(text, width);
|
||||
|
||||
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
|
||||
assert_cursor_pos(&map, 4, ScreenPos { row: 0, col: 2 }); // after first emoji
|
||||
assert_cursor_pos(&map, 8, ScreenPos { row: 1, col: 2 }); // after second emoji
|
||||
assert_cursor_pos(&map, 9, ScreenPos { row: 1, col: 3 }); // after "a"
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graphemes_combining() {
|
||||
let text = "e\u{0301}";
|
||||
let width = 10;
|
||||
let map = build_cursor_map(text, width);
|
||||
|
||||
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
|
||||
assert_cursor_pos(&map, 1, ScreenPos { row: 0, col: 1 }); // after "e"
|
||||
assert_cursor_pos(&map, 3, ScreenPos { row: 0, col: 1 }); // after combining mark
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exact_edge() {
|
||||
let text = "abc def";
|
||||
let width = 3;
|
||||
let map = build_cursor_map(text, width);
|
||||
|
||||
assert_cursor_pos(&map, 0, ScreenPos { row: 0, col: 0 });
|
||||
assert_cursor_pos(&map, 3, ScreenPos { row: 0, col: 3 }); // after "abc"
|
||||
assert_cursor_pos(&map, 4, ScreenPos { row: 1, col: 1 }); // after " "
|
||||
assert_cursor_pos(&map, 7, ScreenPos { row: 1, col: 4 }); // after "def"
|
||||
}
|
||||
Reference in New Issue
Block a user