Move rope grapheme iterators from core to stdx

This commit is contained in:
Michael Davis 2025-01-26 20:58:27 -05:00
parent 51832b02c9
commit 20151a5594
No known key found for this signature in database
8 changed files with 210 additions and 178 deletions

View file

@ -19,10 +19,12 @@ mod test;
use unicode_segmentation::{Graphemes, UnicodeSegmentation};
use helix_stdx::rope::{RopeGraphemes, RopeSliceExt};
use crate::graphemes::{Grapheme, GraphemeStr};
use crate::syntax::Highlight;
use crate::text_annotations::TextAnnotations;
use crate::{Position, RopeGraphemes, RopeSlice};
use crate::{Position, RopeSlice};
/// TODO make Highlight a u32 to reduce the size of this enum to a single word.
#[derive(Debug, Clone, Copy)]
@ -219,7 +221,7 @@ impl<'t> DocumentFormatter<'t> {
text_fmt,
annotations,
visual_pos: Position { row: 0, col: 0 },
graphemes: RopeGraphemes::new(text.slice(block_char_idx..)),
graphemes: text.slice(block_char_idx..).graphemes(),
char_pos: block_char_idx,
exhausted: false,
indent_level: None,

View file

@ -1,7 +1,7 @@
//! Utility functions to traverse the unicode graphemes of a `Rope`'s text contents.
//!
//! Based on <https://github.com/cessen/led/blob/c4fa72405f510b7fd16052f90a598c429b3104a6/src/graphemes.rs>
use ropey::{iter::Chunks, str_utils::byte_to_char_idx, RopeSlice};
use ropey::{str_utils::byte_to_char_idx, RopeSlice};
use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
use unicode_width::UnicodeWidthStr;
@ -270,162 +270,6 @@ pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool {
}
}
/// An iterator over the graphemes of a `RopeSlice`.
#[derive(Clone)]
pub struct RopeGraphemes<'a> {
text: RopeSlice<'a>,
chunks: Chunks<'a>,
cur_chunk: &'a str,
cur_chunk_start: usize,
cursor: GraphemeCursor,
}
impl fmt::Debug for RopeGraphemes<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RopeGraphemes")
.field("text", &self.text)
.field("chunks", &self.chunks)
.field("cur_chunk", &self.cur_chunk)
.field("cur_chunk_start", &self.cur_chunk_start)
// .field("cursor", &self.cursor)
.finish()
}
}
impl RopeGraphemes<'_> {
#[must_use]
pub fn new(slice: RopeSlice) -> RopeGraphemes {
let mut chunks = slice.chunks();
let first_chunk = chunks.next().unwrap_or("");
RopeGraphemes {
text: slice,
chunks,
cur_chunk: first_chunk,
cur_chunk_start: 0,
cursor: GraphemeCursor::new(0, slice.len_bytes(), true),
}
}
}
impl<'a> Iterator for RopeGraphemes<'a> {
type Item = RopeSlice<'a>;
fn next(&mut self) -> Option<RopeSlice<'a>> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
.next_boundary(self.cur_chunk, self.cur_chunk_start)
{
Ok(None) => {
return None;
}
Ok(Some(n)) => {
b = n;
break;
}
Err(GraphemeIncomplete::NextChunk) => {
self.cur_chunk_start += self.cur_chunk.len();
self.cur_chunk = self.chunks.next().unwrap_or("");
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
self.cursor.provide_context(chunk, byte_idx);
}
_ => unreachable!(),
}
}
if a < self.cur_chunk_start {
Some(self.text.byte_slice(a..b))
} else {
let a2 = a - self.cur_chunk_start;
let b2 = b - self.cur_chunk_start;
Some((&self.cur_chunk[a2..b2]).into())
}
}
}
/// An iterator over the graphemes of a `RopeSlice` in reverse.
#[derive(Clone)]
pub struct RevRopeGraphemes<'a> {
text: RopeSlice<'a>,
chunks: Chunks<'a>,
cur_chunk: &'a str,
cur_chunk_start: usize,
cursor: GraphemeCursor,
}
impl fmt::Debug for RevRopeGraphemes<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RevRopeGraphemes")
.field("text", &self.text)
.field("chunks", &self.chunks)
.field("cur_chunk", &self.cur_chunk)
.field("cur_chunk_start", &self.cur_chunk_start)
// .field("cursor", &self.cursor)
.finish()
}
}
impl RevRopeGraphemes<'_> {
#[must_use]
pub fn new(slice: RopeSlice) -> RevRopeGraphemes {
let (mut chunks, mut cur_chunk_start, _, _) = slice.chunks_at_byte(slice.len_bytes());
chunks.reverse();
let first_chunk = chunks.next().unwrap_or("");
cur_chunk_start -= first_chunk.len();
RevRopeGraphemes {
text: slice,
chunks,
cur_chunk: first_chunk,
cur_chunk_start,
cursor: GraphemeCursor::new(slice.len_bytes(), slice.len_bytes(), true),
}
}
}
impl<'a> Iterator for RevRopeGraphemes<'a> {
type Item = RopeSlice<'a>;
fn next(&mut self) -> Option<RopeSlice<'a>> {
let a = self.cursor.cur_cursor();
let b;
loop {
match self
.cursor
.prev_boundary(self.cur_chunk, self.cur_chunk_start)
{
Ok(None) => {
return None;
}
Ok(Some(n)) => {
b = n;
break;
}
Err(GraphemeIncomplete::PrevChunk) => {
self.cur_chunk = self.chunks.next().unwrap_or("");
self.cur_chunk_start -= self.cur_chunk.len();
}
Err(GraphemeIncomplete::PreContext(idx)) => {
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
self.cursor.provide_context(chunk, byte_idx);
}
_ => unreachable!(),
}
}
if a >= self.cur_chunk_start + self.cur_chunk.len() {
Some(self.text.byte_slice(b..a))
} else {
let a2 = a - self.cur_chunk_start;
let b2 = b - self.cur_chunk_start;
Some((&self.cur_chunk[b2..a2]).into())
}
}
}
/// A highly compressed Cow<'a, str> that holds
/// atmost u31::MAX bytes and is readonly
pub struct GraphemeStr<'a> {

View file

@ -8,7 +8,7 @@ use crate::{
graphemes::{grapheme_width, tab_width_at},
syntax::{IndentationHeuristic, LanguageConfiguration, RopeProvider, Syntax},
tree_sitter::Node,
Position, Rope, RopeGraphemes, RopeSlice, Tendril,
Position, Rope, RopeSlice, Tendril,
};
/// Enum representing indentation style.
@ -200,7 +200,7 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize, indent_width: us
/// Create a string of tabs & spaces that has the same visual width as the given RopeSlice (independent of the tab width).
fn whitespace_with_same_width(text: RopeSlice) -> String {
let mut s = String::new();
for grapheme in RopeGraphemes::new(text) {
for grapheme in text.graphemes() {
if grapheme == "\t" {
s.push('\t');
} else {

View file

@ -54,7 +54,6 @@ pub type Tendril = SmartString<smartstring::LazyCompact>;
#[doc(inline)]
pub use {regex, tree_sitter};
pub use graphemes::RopeGraphemes;
pub use position::{
char_idx_at_visual_offset, coords_at_pos, pos_at_coords, softwrapped_dimensions,
visual_offset_from_anchor, visual_offset_from_block, Position, VisualOffsetError,

View file

@ -4,10 +4,12 @@ use std::{
ops::{Add, AddAssign, Sub, SubAssign},
};
use helix_stdx::rope::RopeSliceExt;
use crate::{
chars::char_is_line_ending,
doc_formatter::{DocumentFormatter, TextFormat},
graphemes::{ensure_grapheme_boundary_prev, grapheme_width, RopeGraphemes},
graphemes::{ensure_grapheme_boundary_prev, grapheme_width},
line_ending::line_end_char_index,
text_annotations::TextAnnotations,
RopeSlice,
@ -101,7 +103,7 @@ pub fn coords_at_pos(text: RopeSlice, pos: usize) -> Position {
let line_start = text.line_to_char(line);
let pos = ensure_grapheme_boundary_prev(text, pos);
let col = RopeGraphemes::new(text.slice(line_start..pos)).count();
let col = text.slice(line_start..pos).graphemes().count();
Position::new(line, col)
}
@ -126,7 +128,7 @@ pub fn visual_coords_at_pos(text: RopeSlice, pos: usize, tab_width: usize) -> Po
let mut col = 0;
for grapheme in RopeGraphemes::new(text.slice(line_start..pos)) {
for grapheme in text.slice(line_start..pos).graphemes() {
if grapheme == "\t" {
col += tab_width - (col % tab_width);
} else {
@ -275,7 +277,7 @@ pub fn pos_at_coords(text: RopeSlice, coords: Position, limit_before_line_ending
};
let mut col_char_offset = 0;
for (i, g) in RopeGraphemes::new(text.slice(line_start..line_end)).enumerate() {
for (i, g) in text.slice(line_start..line_end).graphemes().enumerate() {
if i == col {
break;
}
@ -306,7 +308,7 @@ pub fn pos_at_visual_coords(text: RopeSlice, coords: Position, tab_width: usize)
let mut col_char_offset = 0;
let mut cols_remaining = col;
for grapheme in RopeGraphemes::new(text.slice(line_start..line_end)) {
for grapheme in text.slice(line_start..line_end).graphemes() {
let grapheme_width = if grapheme == "\t" {
tab_width - ((col - cols_remaining) % tab_width)
} else {

View file

@ -9,7 +9,7 @@ use crate::{
},
line_ending::get_line_ending,
movement::Direction,
Assoc, ChangeSet, RopeGraphemes, RopeSlice,
Assoc, ChangeSet, RopeSlice,
};
use helix_stdx::range::is_subset;
use helix_stdx::rope::{self, RopeSliceExt};
@ -379,7 +379,7 @@ impl Range {
/// Returns true if this Range covers a single grapheme in the given text
pub fn is_single_grapheme(&self, doc: RopeSlice) -> bool {
let mut graphemes = RopeGraphemes::new(doc.slice(self.from()..self.to()));
let mut graphemes = doc.slice(self.from()..self.to()).graphemes();
let first = graphemes.next();
let second = graphemes.next();
first.is_some() && second.is_none()