move syntax highlighting to separate crate

This commit is contained in:
Pascal Kuthe 2024-03-15 03:47:07 +01:00
parent 08ee8b9443
commit c4b7b08809
No known key found for this signature in database
GPG key ID: D715E8655AE166A6
18 changed files with 2245 additions and 1871 deletions

View file

@ -0,0 +1,439 @@
use std::borrow::Cow;
use std::cell::RefCell;
use std::sync::atomic::{self, AtomicUsize};
use std::{fmt, iter, mem, ops};
use ropey::RopeSlice;
use tree_sitter::{QueryCaptures, QueryCursor, Tree};
use crate::ropey::RopeProvider;
use crate::{
byte_range_to_str, Error, HighlightConfiguration, Syntax, PARSER, TREE_SITTER_MATCH_LIMIT,
};
const CANCELLATION_CHECK_INTERVAL: usize = 100;
/// Indicates which highlight should be applied to a region of source code.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Highlight(pub usize);
/// Represents a single step in rendering a syntax-highlighted document.
#[derive(Copy, Clone, Debug)]
pub enum HighlightEvent {
Source { start: usize, end: usize },
HighlightStart(Highlight),
HighlightEnd,
}
#[derive(Debug)]
struct LocalDef<'a> {
name: Cow<'a, str>,
value_range: ops::Range<usize>,
highlight: Option<Highlight>,
}
#[derive(Debug)]
struct LocalScope<'a> {
inherits: bool,
range: ops::Range<usize>,
local_defs: Vec<LocalDef<'a>>,
}
#[derive(Debug)]
struct HighlightIter<'a> {
source: RopeSlice<'a>,
byte_offset: usize,
cancellation_flag: Option<&'a AtomicUsize>,
layers: Vec<HighlightIterLayer<'a>>,
iter_count: usize,
next_event: Option<HighlightEvent>,
last_highlight_range: Option<(usize, usize, u32)>,
}
struct HighlightIterLayer<'a> {
_tree: Option<Tree>,
cursor: QueryCursor,
captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>,
config: &'a HighlightConfiguration,
highlight_end_stack: Vec<usize>,
scope_stack: Vec<LocalScope<'a>>,
depth: u32,
}
impl<'a> fmt::Debug for HighlightIterLayer<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("HighlightIterLayer").finish()
}
}
impl<'a> HighlightIterLayer<'a> {
// First, sort scope boundaries by their byte offset in the document. At a
// given position, emit scope endings before scope beginnings. Finally, emit
// scope boundaries from deeper layers first.
fn sort_key(&self) -> Option<(usize, bool, isize)> {
let depth = -(self.depth as isize);
let next_start = self
.captures
.borrow_mut()
.peek()
.map(|(m, i)| m.captures[*i].node.start_byte());
let next_end = self.highlight_end_stack.last().cloned();
match (next_start, next_end) {
(Some(start), Some(end)) => {
if start < end {
Some((start, true, depth))
} else {
Some((end, false, depth))
}
}
(Some(i), None) => Some((i, true, depth)),
(None, Some(j)) => Some((j, false, depth)),
_ => None,
}
}
}
impl<'a> HighlightIter<'a> {
fn emit_event(
&mut self,
offset: usize,
event: Option<HighlightEvent>,
) -> Option<Result<HighlightEvent, Error>> {
let result;
if self.byte_offset < offset {
result = Some(Ok(HighlightEvent::Source {
start: self.byte_offset,
end: offset,
}));
self.byte_offset = offset;
self.next_event = event;
} else {
result = event.map(Ok);
}
self.sort_layers();
result
}
fn sort_layers(&mut self) {
while !self.layers.is_empty() {
if let Some(sort_key) = self.layers[0].sort_key() {
let mut i = 0;
while i + 1 < self.layers.len() {
if let Some(next_offset) = self.layers[i + 1].sort_key() {
if next_offset < sort_key {
i += 1;
continue;
}
} else {
let layer = self.layers.remove(i + 1);
PARSER.with(|ts_parser| {
let highlighter = &mut ts_parser.borrow_mut();
highlighter.cursors.push(layer.cursor);
});
}
break;
}
if i > 0 {
self.layers[0..(i + 1)].rotate_left(1);
}
break;
} else {
let layer = self.layers.remove(0);
PARSER.with(|ts_parser| {
let highlighter = &mut ts_parser.borrow_mut();
highlighter.cursors.push(layer.cursor);
});
}
}
}
}
impl<'a> Iterator for HighlightIter<'a> {
type Item = Result<HighlightEvent, Error>;
fn next(&mut self) -> Option<Self::Item> {
'main: loop {
// If we've already determined the next highlight boundary, just return it.
if let Some(e) = self.next_event.take() {
return Some(Ok(e));
}
// Periodically check for cancellation, returning `Cancelled` error if the
// cancellation flag was flipped.
if let Some(cancellation_flag) = self.cancellation_flag {
self.iter_count += 1;
if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
self.iter_count = 0;
if cancellation_flag.load(atomic::Ordering::Relaxed) != 0 {
return Some(Err(Error::Cancelled));
}
}
}
// If none of the layers have any more highlight boundaries, terminate.
if self.layers.is_empty() {
let len = self.source.len_bytes();
return if self.byte_offset < len {
let result = Some(Ok(HighlightEvent::Source {
start: self.byte_offset,
end: len,
}));
self.byte_offset = len;
result
} else {
None
};
}
// Get the next capture from whichever layer has the earliest highlight boundary.
let range;
let layer = &mut self.layers[0];
let captures = layer.captures.get_mut();
if let Some((next_match, capture_index)) = captures.peek() {
let next_capture = next_match.captures[*capture_index];
range = next_capture.node.byte_range();
// If any previous highlight ends before this node starts, then before
// processing this capture, emit the source code up until the end of the
// previous highlight, and an end event for that highlight.
if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
if end_byte <= range.start {
layer.highlight_end_stack.pop();
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
}
}
}
// If there are no more captures, then emit any remaining highlight end events.
// And if there are none of those, then just advance to the end of the document.
else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
layer.highlight_end_stack.pop();
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
} else {
return self.emit_event(self.source.len_bytes(), None);
};
let (mut match_, capture_index) = captures.next().unwrap();
let mut capture = match_.captures[capture_index];
// Remove from the local scope stack any local scopes that have already ended.
while range.start > layer.scope_stack.last().unwrap().range.end {
layer.scope_stack.pop();
}
// If this capture is for tracking local variables, then process the
// local variable info.
let mut reference_highlight = None;
let mut definition_highlight = None;
while match_.pattern_index < layer.config.highlights_pattern_index {
// If the node represents a local scope, push a new local scope onto
// the scope stack.
if Some(capture.index) == layer.config.local_scope_capture_index {
definition_highlight = None;
let mut scope = LocalScope {
inherits: true,
range: range.clone(),
local_defs: Vec::new(),
};
for prop in layer.config.query.property_settings(match_.pattern_index) {
if let "local.scope-inherits" = prop.key.as_ref() {
scope.inherits =
prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
}
}
layer.scope_stack.push(scope);
}
// If the node represents a definition, add a new definition to the
// local scope at the top of the scope stack.
else if Some(capture.index) == layer.config.local_def_capture_index {
reference_highlight = None;
let scope = layer.scope_stack.last_mut().unwrap();
let mut value_range = 0..0;
for capture in match_.captures {
if Some(capture.index) == layer.config.local_def_value_capture_index {
value_range = capture.node.byte_range();
}
}
let name = byte_range_to_str(range.clone(), self.source);
scope.local_defs.push(LocalDef {
name,
value_range,
highlight: None,
});
definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
}
// If the node represents a reference, then try to find the corresponding
// definition in the scope stack.
else if Some(capture.index) == layer.config.local_ref_capture_index
&& definition_highlight.is_none()
{
definition_highlight = None;
let name = byte_range_to_str(range.clone(), self.source);
for scope in layer.scope_stack.iter().rev() {
if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
if def.name == name && range.start >= def.value_range.end {
Some(def.highlight)
} else {
None
}
}) {
reference_highlight = highlight;
break;
}
if !scope.inherits {
break;
}
}
}
// Continue processing any additional matches for the same node.
if let Some((next_match, next_capture_index)) = captures.peek() {
let next_capture = next_match.captures[*next_capture_index];
if next_capture.node == capture.node {
capture = next_capture;
match_ = captures.next().unwrap().0;
continue;
}
}
self.sort_layers();
continue 'main;
}
// Otherwise, this capture must represent a highlight.
// If this exact range has already been highlighted by an earlier pattern, or by
// a different layer, then skip over this one.
if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
if range.start == last_start && range.end == last_end && layer.depth < last_depth {
self.sort_layers();
continue 'main;
}
}
// If the current node was found to be a local variable, then skip over any
// highlighting patterns that are disabled for local variables.
if definition_highlight.is_some() || reference_highlight.is_some() {
while layer.config.non_local_variable_patterns[match_.pattern_index] {
match_.remove();
if let Some((next_match, next_capture_index)) = captures.peek() {
let next_capture = next_match.captures[*next_capture_index];
if next_capture.node == capture.node {
capture = next_capture;
match_ = captures.next().unwrap().0;
continue;
}
}
self.sort_layers();
continue 'main;
}
}
// Once a highlighting pattern is found for the current node, skip over
// any later highlighting patterns that also match this node. Captures
// for a given node are ordered by pattern index, so these subsequent
// captures are guaranteed to be for highlighting, not injections or
// local variables.
while let Some((next_match, next_capture_index)) = captures.peek() {
let next_capture = next_match.captures[*next_capture_index];
if next_capture.node == capture.node {
captures.next();
} else {
break;
}
}
let current_highlight = layer.config.highlight_indices.load()[capture.index as usize];
// If this node represents a local definition, then store the current
// highlight value on the local scope entry representing this node.
if let Some(definition_highlight) = definition_highlight {
*definition_highlight = current_highlight;
}
// Emit a scope start event and push the node's end position to the stack.
if let Some(highlight) = reference_highlight.or(current_highlight) {
self.last_highlight_range = Some((range.start, range.end, layer.depth));
layer.highlight_end_stack.push(range.end);
return self
.emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
}
self.sort_layers();
}
}
}
impl Syntax {
/// Iterate over the highlighted regions for a given slice of source code.
pub fn highlight_iter<'a>(
&'a self,
source: RopeSlice<'a>,
range: Option<std::ops::Range<usize>>,
cancellation_flag: Option<&'a AtomicUsize>,
) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
let mut layers = self
.layers
.iter()
.filter_map(|(_, layer)| {
// TODO: if range doesn't overlap layer range, skip it
// Reuse a cursor from the pool if available.
let mut cursor = PARSER.with(|ts_parser| {
let highlighter = &mut ts_parser.borrow_mut();
highlighter.cursors.pop().unwrap_or_else(QueryCursor::new)
});
// The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
// prevents them from being moved. But both of these values are really just
// pointers, so it's actually ok to move them.
let cursor_ref =
unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
// if reusing cursors & no range this resets to whole range
cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT);
let mut captures = cursor_ref
.captures(
&layer.config.query,
layer.tree().root_node(),
RopeProvider(source),
)
.peekable();
// If there's no captures, skip the layer
captures.peek()?;
Some(HighlightIterLayer {
highlight_end_stack: Vec::new(),
scope_stack: vec![LocalScope {
inherits: false,
range: 0..usize::MAX,
local_defs: Vec::new(),
}],
cursor,
_tree: None,
captures: RefCell::new(captures),
config: layer.config.as_ref(), // TODO: just reuse `layer`
depth: layer.depth, // TODO: just reuse `layer`
})
})
.collect::<Vec<_>>();
layers.sort_unstable_by_key(|layer| layer.sort_key());
let mut result = HighlightIter {
source,
byte_offset: range.map_or(0, |r| r.start),
cancellation_flag,
iter_count: 0,
layers,
next_event: None,
last_highlight_range: None,
};
result.sort_layers();
result
}
}