mirror of
https://github.com/helix-editor/helix.git
synced 2025-04-04 19:37:54 +03:00
getting somewhere
This commit is contained in:
parent
29ffc20290
commit
f7686d7af2
12 changed files with 731 additions and 173 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1433,6 +1433,7 @@ dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"bitflags 2.6.0",
|
"bitflags 2.6.0",
|
||||||
|
"cc",
|
||||||
"hashbrown 0.14.5",
|
"hashbrown 0.14.5",
|
||||||
"helix-stdx",
|
"helix-stdx",
|
||||||
"libloading",
|
"libloading",
|
||||||
|
|
|
@ -1,15 +1,21 @@
|
||||||
use core::slice;
|
use core::slice;
|
||||||
|
use std::cell::RefCell;
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
use std::mem::replace;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
|
use ropey::RopeSlice;
|
||||||
use slotmap::{new_key_type, SlotMap};
|
use slotmap::{new_key_type, SlotMap};
|
||||||
|
|
||||||
use crate::parse::LayerUpdateFlags;
|
use crate::parse::LayerUpdateFlags;
|
||||||
use crate::tree_sitter::SyntaxTree;
|
use crate::tree_sitter::{
|
||||||
use crate::{HighlightConfiguration, RopeProvider};
|
self, Capture, InactiveQueryCursor, Parser, Query, QueryCursor, RopeTsInput, SyntaxTree,
|
||||||
|
SyntaxTreeNode,
|
||||||
|
};
|
||||||
|
use crate::HighlightConfiguration;
|
||||||
|
|
||||||
// TODO(perf): replace std::ops::Range with helix_core::Range once added
|
// TODO(perf): replace std::ops::Range<usize> with helix_stdx::Range<u32> once added
|
||||||
type Range = std::ops::Range<usize>;
|
type Range = std::ops::Range<usize>;
|
||||||
|
|
||||||
new_key_type! {
|
new_key_type! {
|
||||||
|
@ -23,15 +29,16 @@ pub struct LanguageLayer {
|
||||||
pub(crate) parse_tree: Option<SyntaxTree>,
|
pub(crate) parse_tree: Option<SyntaxTree>,
|
||||||
/// internal flags used during parsing to track incremental invalidation
|
/// internal flags used during parsing to track incremental invalidation
|
||||||
pub(crate) flags: LayerUpdateFlags,
|
pub(crate) flags: LayerUpdateFlags,
|
||||||
|
ranges: Vec<tree_sitter::Range>,
|
||||||
pub(crate) parent: Option<LayerId>,
|
pub(crate) parent: Option<LayerId>,
|
||||||
/// a list of **sorted** non-overlapping injection ranges note that
|
/// a list of **sorted** non-overlapping injection ranges. Note that
|
||||||
/// injection ranges are not relative to the start of this layer but the
|
/// injection ranges are not relative to the start of this layer but the
|
||||||
/// start of the root layer
|
/// start of the root layer
|
||||||
pub(crate) injection_ranges: Box<[InjectionRange]>,
|
pub(crate) injections: Box<[Injection]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
pub(crate) struct InjectionRange {
|
pub(crate) struct Injection {
|
||||||
pub byte_range: Range,
|
pub byte_range: Range,
|
||||||
pub layer: LayerId,
|
pub layer: LayerId,
|
||||||
}
|
}
|
||||||
|
@ -39,11 +46,11 @@ pub(crate) struct InjectionRange {
|
||||||
impl LanguageLayer {
|
impl LanguageLayer {
|
||||||
/// Returns the injection range **within this layers** that contains `idx`.
|
/// Returns the injection range **within this layers** that contains `idx`.
|
||||||
/// This function will not descend into nested injections
|
/// This function will not descend into nested injections
|
||||||
pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&InjectionRange> {
|
pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&Injection> {
|
||||||
let i = self
|
let i = self
|
||||||
.injection_ranges
|
.injections
|
||||||
.partition_point(|range| range.byte_range.start <= idx);
|
.partition_point(|range| range.byte_range.start <= idx);
|
||||||
self.injection_ranges
|
self.injections
|
||||||
.get(i)
|
.get(i)
|
||||||
.filter(|injection| injection.byte_range.end > idx)
|
.filter(|injection| injection.byte_range.end > idx)
|
||||||
}
|
}
|
||||||
|
@ -75,20 +82,187 @@ impl InjectionTree {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ActiveInjection<'a> {
|
#[derive(Clone)]
|
||||||
injections: Peekable<slice::Iter<'a, InjectionTree>>,
|
pub struct MatchedNode {
|
||||||
range: InjectionRange,
|
pub capture: Capture,
|
||||||
|
pub byte_range: Range,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ActiveLayer<'a, State> {
|
struct LayerQueryIter<'a> {
|
||||||
state: State,
|
cursor: QueryCursor<'a, 'a, RopeTsInput<'a>>,
|
||||||
/// the query captures just for this layer
|
peeked: Option<MatchedNode>,
|
||||||
layer_captures: Peekable<LayerQueryCaptures<'a>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type LayerQueryCaptures<'a> = tree_sitter::QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>;
|
impl<'a> LayerQueryIter<'a> {
|
||||||
|
fn peek(&mut self) -> Option<&MatchedNode> {
|
||||||
|
if self.peeked.is_none() {
|
||||||
|
let (query_match, node_idx) = self.cursor.next_matched_node()?;
|
||||||
|
let matched_node = query_match.matched_node(node_idx);
|
||||||
|
self.peeked = Some(MatchedNode {
|
||||||
|
capture: matched_node.capture,
|
||||||
|
byte_range: matched_node.syntax_node.byte_range(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
self.peeked.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
pub struct QueryCaptures<'a> {
|
fn consume(&mut self) -> MatchedNode {
|
||||||
active_layers: HashMap<LayerId, ActiveLayer<'a, ()>>,
|
self.peeked.take().unwrap()
|
||||||
active_injections: Vec<ActiveInjection<'a>>,
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ActiveLayer<'a> {
|
||||||
|
query_iter: LayerQueryIter<'a>,
|
||||||
|
injections: Peekable<slice::Iter<'a, Injection>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct QueryBuilder<'a, 'tree> {
|
||||||
|
query: &'a Query,
|
||||||
|
node: &'a SyntaxTreeNode<'tree>,
|
||||||
|
src: RopeSlice<'a>,
|
||||||
|
injection_tree: &'a InjectionTree,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct QueryIter<'a, 'tree> {
|
||||||
|
query_builder: Box<QueryBuilder<'a, 'tree>>,
|
||||||
|
active_layers: HashMap<LayerId, ActiveLayer<'a>>,
|
||||||
|
active_injections: Vec<Injection>,
|
||||||
|
current_injection: Injection,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> QueryIter<'a, '_> {
|
||||||
|
fn enter_injection(&mut self, injection: Injection) -> bool {
|
||||||
|
self.active_layers
|
||||||
|
.entry(injection.layer)
|
||||||
|
.or_insert_with(|| {
|
||||||
|
let layer = &self.query_builder.injection_tree.layers[injection.layer];
|
||||||
|
let injection_start = layer
|
||||||
|
.injections
|
||||||
|
.partition_point(|child| child.byte_range.start < injection.byte_range.start);
|
||||||
|
let cursor = get_cursor().execute_query(
|
||||||
|
self.query_builder.query,
|
||||||
|
self.query_builder.node,
|
||||||
|
RopeTsInput::new(self.query_builder.src),
|
||||||
|
);
|
||||||
|
ActiveLayer {
|
||||||
|
query_iter: LayerQueryIter {
|
||||||
|
cursor,
|
||||||
|
peeked: None,
|
||||||
|
},
|
||||||
|
injections: layer.injections[injection_start..].iter().peekable(),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let old_injection = replace(&mut self.current_injection, injection);
|
||||||
|
self.active_injections.push(old_injection);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn exit_injection(&mut self) -> Option<Injection> {
|
||||||
|
let injection = replace(&mut self.current_injection, self.active_injections.pop()?);
|
||||||
|
let finished_layer = self.active_layers[&injection.layer]
|
||||||
|
.query_iter
|
||||||
|
.peeked
|
||||||
|
.is_none();
|
||||||
|
if finished_layer {
|
||||||
|
let layer = self.active_layers.remove(&injection.layer).unwrap();
|
||||||
|
reuse_cursor(layer.query_iter.cursor.reuse());
|
||||||
|
}
|
||||||
|
Some(injection)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum QueryIterEvent {
|
||||||
|
EnterInjection(Injection),
|
||||||
|
Match(MatchedNode),
|
||||||
|
ExitInjection(Injection),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for QueryIter<'a, '_> {
|
||||||
|
type Item = QueryIterEvent;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<QueryIterEvent> {
|
||||||
|
loop {
|
||||||
|
let active_layer = self
|
||||||
|
.active_layers
|
||||||
|
.get_mut(&self.current_injection.layer)
|
||||||
|
.unwrap();
|
||||||
|
let next_injection = active_layer.injections.peek().filter(|injection| {
|
||||||
|
injection.byte_range.start < self.current_injection.byte_range.end
|
||||||
|
});
|
||||||
|
let next_match = active_layer.query_iter.peek().filter(|matched_node| {
|
||||||
|
matched_node.byte_range.start < self.current_injection.byte_range.end
|
||||||
|
});
|
||||||
|
|
||||||
|
match (next_match, next_injection) {
|
||||||
|
(None, None) => {
|
||||||
|
return self.exit_injection().map(QueryIterEvent::ExitInjection);
|
||||||
|
}
|
||||||
|
(Some(_), None) => {
|
||||||
|
// consume match
|
||||||
|
let matched_node = active_layer.query_iter.consume();
|
||||||
|
return Some(QueryIterEvent::Match(matched_node));
|
||||||
|
}
|
||||||
|
(Some(matched_node), Some(injection))
|
||||||
|
if matched_node.byte_range.start <= injection.byte_range.end =>
|
||||||
|
{
|
||||||
|
// consume match
|
||||||
|
let matched_node = active_layer.query_iter.consume();
|
||||||
|
// ignore nodes that are overlapped by the injection
|
||||||
|
if matched_node.byte_range.start <= injection.byte_range.start {
|
||||||
|
return Some(QueryIterEvent::Match(matched_node));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Some(_), Some(_)) | (None, Some(_)) => {
|
||||||
|
// consume injection
|
||||||
|
let injection = active_layer.injections.next().unwrap();
|
||||||
|
if self.enter_injection(injection.clone()) {
|
||||||
|
return Some(QueryIterEvent::EnterInjection(injection.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TsParser {
|
||||||
|
parser: crate::tree_sitter::Parser,
|
||||||
|
pub cursors: Vec<crate::tree_sitter::InactiveQueryCursor>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// could also just use a pool, or a single instance?
|
||||||
|
thread_local! {
|
||||||
|
static PARSER: RefCell<TsParser> = RefCell::new(TsParser {
|
||||||
|
parser: Parser::new(),
|
||||||
|
cursors: Vec::new(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_cursor<T>(f: impl FnOnce(&mut InactiveQueryCursor) -> T) -> T {
|
||||||
|
PARSER.with(|parser| {
|
||||||
|
let mut parser = parser.borrow_mut();
|
||||||
|
let mut cursor = parser
|
||||||
|
.cursors
|
||||||
|
.pop()
|
||||||
|
.unwrap_or_else(InactiveQueryCursor::new);
|
||||||
|
let res = f(&mut cursor);
|
||||||
|
parser.cursors.push(cursor);
|
||||||
|
res
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_cursor() -> InactiveQueryCursor {
|
||||||
|
PARSER.with(|parser| {
|
||||||
|
let mut parser = parser.borrow_mut();
|
||||||
|
parser
|
||||||
|
.cursors
|
||||||
|
.pop()
|
||||||
|
.unwrap_or_else(InactiveQueryCursor::new)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reuse_cursor(cursor: InactiveQueryCursor) {
|
||||||
|
PARSER.with(|parser| {
|
||||||
|
let mut parser = parser.borrow_mut();
|
||||||
|
parser.cursors.push(cursor)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -337,7 +337,7 @@ thread_local! {
|
||||||
pub fn with_cursor<T>(f: impl FnOnce(&mut QueryCursor) -> T) -> T {
|
pub fn with_cursor<T>(f: impl FnOnce(&mut QueryCursor) -> T) -> T {
|
||||||
PARSER.with(|parser| {
|
PARSER.with(|parser| {
|
||||||
let mut parser = parser.borrow_mut();
|
let mut parser = parser.borrow_mut();
|
||||||
let mut cursor = parser.cursors.pop().unwrap_or_else(QueryCursor::new);
|
let mut cursor = parser.cursors.pop().unwrap_or_default();
|
||||||
let res = f(&mut cursor);
|
let res = f(&mut cursor);
|
||||||
parser.cursors.push(cursor);
|
parser.cursors.push(cursor);
|
||||||
res
|
res
|
||||||
|
|
|
@ -1,13 +1,19 @@
|
||||||
mod grammar;
|
mod grammar;
|
||||||
mod parser;
|
mod parser;
|
||||||
mod query;
|
mod query;
|
||||||
mod query_captures;
|
mod query_cursor;
|
||||||
|
mod query_match;
|
||||||
mod ropey;
|
mod ropey;
|
||||||
mod syntax_tree;
|
mod syntax_tree;
|
||||||
mod syntax_tree_node;
|
mod syntax_tree_node;
|
||||||
|
|
||||||
|
use std::ops;
|
||||||
|
|
||||||
pub use grammar::Grammar;
|
pub use grammar::Grammar;
|
||||||
pub use parser::{Parser, ParserInputRaw};
|
pub use parser::{Parser, ParserInputRaw};
|
||||||
|
pub use query::{Capture, ParserErrorLocation, Pattern, Query, QueryStr};
|
||||||
|
pub use query_cursor::{InactiveQueryCursor, MatchedNode, MatchedNodeIdx, QueryCursor, QueryMatch};
|
||||||
|
pub use ropey::RopeTsInput;
|
||||||
pub use syntax_tree::{InputEdit, SyntaxTree};
|
pub use syntax_tree::{InputEdit, SyntaxTree};
|
||||||
pub use syntax_tree_node::SyntaxTreeNode;
|
pub use syntax_tree_node::SyntaxTreeNode;
|
||||||
|
|
||||||
|
@ -26,3 +32,14 @@ pub struct Range {
|
||||||
pub start_byte: u32,
|
pub start_byte: u32,
|
||||||
pub end_byte: u32,
|
pub end_byte: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub trait TsInput {
|
||||||
|
type Cursor: regex_cursor::Cursor;
|
||||||
|
fn cursor_at(&mut self, offset: usize) -> &mut Self::Cursor;
|
||||||
|
fn eq(&mut self, range1: ops::Range<usize>, range2: ops::Range<usize>) -> bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait IntoTsInput {
|
||||||
|
type TsInput: TsInput;
|
||||||
|
fn into_ts_input(self) -> Self::TsInput;
|
||||||
|
}
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
use std::os::raw::c_void;
|
use std::os::raw::c_void;
|
||||||
use std::panic::catch_unwind;
|
use std::panic::{catch_unwind, AssertUnwindSafe};
|
||||||
use std::ptr::NonNull;
|
use std::ptr::NonNull;
|
||||||
use std::{fmt, ptr};
|
use std::{fmt, ptr};
|
||||||
|
|
||||||
|
use regex_cursor::Cursor;
|
||||||
|
|
||||||
use crate::tree_sitter::syntax_tree::{SyntaxTree, SyntaxTreeData};
|
use crate::tree_sitter::syntax_tree::{SyntaxTree, SyntaxTreeData};
|
||||||
use crate::tree_sitter::{Grammar, Point, Range};
|
use crate::tree_sitter::{Grammar, IntoTsInput, Point, Range, TsInput};
|
||||||
|
|
||||||
// opaque data
|
// opaque data
|
||||||
enum ParserData {}
|
enum ParserData {}
|
||||||
|
@ -51,25 +53,28 @@ impl Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn parse<I: ParserInput>(
|
pub fn parse<I: TsInput>(
|
||||||
&mut self,
|
&mut self,
|
||||||
input: impl IntoParserInput<ParserInput = I>,
|
input: impl IntoTsInput<TsInput = I>,
|
||||||
old_tree: Option<&SyntaxTree>,
|
old_tree: Option<&SyntaxTree>,
|
||||||
) -> Option<SyntaxTree> {
|
) -> Option<SyntaxTree> {
|
||||||
let mut input = input.into_parser_input();
|
let mut input = input.into_ts_input();
|
||||||
unsafe extern "C" fn read<C: ParserInput>(
|
unsafe extern "C" fn read<C: TsInput>(
|
||||||
payload: NonNull<c_void>,
|
payload: NonNull<c_void>,
|
||||||
byte_index: u32,
|
byte_index: u32,
|
||||||
_position: Point,
|
_position: Point,
|
||||||
bytes_read: &mut u32,
|
bytes_read: *mut u32,
|
||||||
) -> *const u8 {
|
) -> *const u8 {
|
||||||
match catch_unwind(|| {
|
let cursor = catch_unwind(AssertUnwindSafe(move || {
|
||||||
let cursor: &mut C = payload.cast().as_mut();
|
let input: &mut C = payload.cast().as_mut();
|
||||||
cursor.read(byte_index as usize)
|
let cursor = input.cursor_at(byte_index as usize);
|
||||||
}) {
|
let slice = cursor.chunk();
|
||||||
Ok(slice) => {
|
(slice.as_ptr(), slice.len().try_into().unwrap())
|
||||||
*bytes_read = slice.len() as u32;
|
}));
|
||||||
slice.as_ptr()
|
match cursor {
|
||||||
|
Ok((ptr, len)) => {
|
||||||
|
*bytes_read = len;
|
||||||
|
ptr
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
*bytes_read = 0;
|
*bytes_read = 0;
|
||||||
|
@ -121,7 +126,7 @@ type TreeSitterReadFn = unsafe extern "C" fn(
|
||||||
payload: NonNull<c_void>,
|
payload: NonNull<c_void>,
|
||||||
byte_index: u32,
|
byte_index: u32,
|
||||||
position: Point,
|
position: Point,
|
||||||
bytes_read: &mut u32,
|
bytes_read: *mut u32,
|
||||||
) -> *const u8;
|
) -> *const u8;
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
|
@ -132,15 +137,6 @@ pub struct ParserInputRaw {
|
||||||
pub encoding: u32,
|
pub encoding: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait ParserInput {
|
|
||||||
fn read(&mut self, offset: usize) -> &[u8];
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait IntoParserInput {
|
|
||||||
type ParserInput;
|
|
||||||
fn into_parser_input(self) -> Self::ParserInput;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
/// Create a new parser
|
/// Create a new parser
|
||||||
fn ts_parser_new() -> NonNull<ParserData>;
|
fn ts_parser_new() -> NonNull<ParserData>;
|
||||||
|
|
|
@ -4,8 +4,6 @@ use std::path::{Path, PathBuf};
|
||||||
use std::ptr::NonNull;
|
use std::ptr::NonNull;
|
||||||
use std::{slice, str};
|
use std::{slice, str};
|
||||||
|
|
||||||
use regex_cursor::Cursor;
|
|
||||||
|
|
||||||
use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate, TextPredicate};
|
use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate, TextPredicate};
|
||||||
use crate::tree_sitter::query::property::QueryProperty;
|
use crate::tree_sitter::query::property::QueryProperty;
|
||||||
use crate::tree_sitter::Grammar;
|
use crate::tree_sitter::Grammar;
|
||||||
|
@ -13,20 +11,23 @@ use crate::tree_sitter::Grammar;
|
||||||
mod predicate;
|
mod predicate;
|
||||||
mod property;
|
mod property;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub struct Pattern(pub(crate) u32);
|
||||||
|
|
||||||
pub enum QueryData {}
|
pub enum QueryData {}
|
||||||
|
|
||||||
pub(super) struct Pattern {
|
pub(super) struct PatternData {
|
||||||
text_predicates: Range<u32>,
|
text_predicates: Range<u32>,
|
||||||
properties: Range<u32>,
|
properties: Range<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Query {
|
pub struct Query {
|
||||||
raw: NonNull<QueryData>,
|
pub(crate) raw: NonNull<QueryData>,
|
||||||
num_captures: u32,
|
num_captures: u32,
|
||||||
num_strings: u32,
|
num_strings: u32,
|
||||||
text_predicates: Vec<TextPredicate>,
|
text_predicates: Vec<TextPredicate>,
|
||||||
properties: Vec<QueryProperty>,
|
properties: Vec<QueryProperty>,
|
||||||
patterns: Box<[Pattern]>,
|
patterns: Box<[PatternData]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Query {
|
impl Query {
|
||||||
|
@ -40,7 +41,7 @@ impl Query {
|
||||||
grammar: Grammar,
|
grammar: Grammar,
|
||||||
source: &str,
|
source: &str,
|
||||||
path: impl AsRef<Path>,
|
path: impl AsRef<Path>,
|
||||||
mut custom_predicate: impl FnMut(Predicate) -> Result<(), InvalidPredicateError>,
|
mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>,
|
||||||
) -> Result<Self, ParseError> {
|
) -> Result<Self, ParseError> {
|
||||||
assert!(
|
assert!(
|
||||||
source.len() <= i32::MAX as usize,
|
source.len() <= i32::MAX as usize,
|
||||||
|
@ -121,7 +122,7 @@ impl Query {
|
||||||
}
|
}
|
||||||
RawQueryError::Language => unreachable!("should be handled at grammar load"),
|
RawQueryError::Language => unreachable!("should be handled at grammar load"),
|
||||||
};
|
};
|
||||||
return Err(err)
|
return Err(err);
|
||||||
};
|
};
|
||||||
|
|
||||||
// I am not going to bother with safety comments here, all of these are
|
// I am not going to bother with safety comments here, all of these are
|
||||||
|
@ -141,7 +142,7 @@ impl Query {
|
||||||
let patterns: Result<_, ParseError> = (0..num_patterns)
|
let patterns: Result<_, ParseError> = (0..num_patterns)
|
||||||
.map(|pattern| {
|
.map(|pattern| {
|
||||||
query
|
query
|
||||||
.parse_pattern_predicates(pattern, &mut custom_predicate)
|
.parse_pattern_predicates(Pattern(pattern), &mut custom_predicate)
|
||||||
.map_err(|err| ParseError::InvalidPredicate {
|
.map_err(|err| ParseError::InvalidPredicate {
|
||||||
message: err.msg.into(),
|
message: err.msg.into(),
|
||||||
location: ParserErrorLocation::new(
|
location: ParserErrorLocation::new(
|
||||||
|
@ -157,35 +158,6 @@ impl Query {
|
||||||
Ok(query)
|
Ok(query)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn satsifies_text_predicate<C: Cursor>(
|
|
||||||
&self,
|
|
||||||
cursor: &mut regex_cursor::Input<C>,
|
|
||||||
pattern: u32,
|
|
||||||
) {
|
|
||||||
let text_predicates = self.patterns[pattern as usize].text_predicates;
|
|
||||||
let text_predicates =
|
|
||||||
&self.text_predicates[text_predicates.start as usize..text_predicates.end as usize];
|
|
||||||
for predicate in text_predicates {
|
|
||||||
match predicate.kind {
|
|
||||||
predicate::TextPredicateKind::EqString(_) => todo!(),
|
|
||||||
predicate::TextPredicateKind::EqCapture(_) => todo!(),
|
|
||||||
predicate::TextPredicateKind::MatchString(_) => todo!(),
|
|
||||||
predicate::TextPredicateKind::AnyString(_) => todo!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// fn parse_predicates(&mut self) {
|
|
||||||
// let pattern_count = unsafe { ts_query_pattern_count(self.raw) };
|
|
||||||
|
|
||||||
// let mut text_predicates = Vec::with_capacity(pattern_count as usize);
|
|
||||||
// let mut property_predicates = Vec::with_capacity(pattern_count as usize);
|
|
||||||
// let mut property_settings = Vec::with_capacity(pattern_count as usize);
|
|
||||||
// let mut general_predicates = Vec::with_capacity(pattern_count as usize);
|
|
||||||
|
|
||||||
// for i in 0..pattern_count {}
|
|
||||||
// }
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn get_string(&self, str: QueryStr) -> &str {
|
fn get_string(&self, str: QueryStr) -> &str {
|
||||||
let value_id = str.0;
|
let value_id = str.0;
|
||||||
|
@ -218,10 +190,15 @@ impl Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn pattern_properies(&self, pattern_idx: u32) -> &[QueryProperty] {
|
pub fn pattern_properies(&self, pattern_idx: Pattern) -> &[QueryProperty] {
|
||||||
let range = self.patterns[pattern_idx as usize].properties.clone();
|
let range = self.patterns[pattern_idx.0 as usize].properties.clone();
|
||||||
&self.properties[range.start as usize..range.end as usize]
|
&self.properties[range.start as usize..range.end as usize]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn pattern_text_predicates(&self, pattern_idx: u16) -> &[TextPredicate] {
|
||||||
|
let range = self.patterns[pattern_idx as usize].text_predicates.clone();
|
||||||
|
&self.text_predicates[range.start as usize..range.end as usize]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Drop for Query {
|
impl Drop for Query {
|
||||||
|
@ -231,6 +208,7 @@ impl Drop for Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
#[repr(transparent)]
|
||||||
pub struct Capture(u32);
|
pub struct Capture(u32);
|
||||||
|
|
||||||
impl Capture {
|
impl Capture {
|
||||||
|
|
|
@ -1,11 +1,16 @@
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
use std::iter::zip;
|
||||||
|
use std::ops::Range;
|
||||||
use std::ptr::NonNull;
|
use std::ptr::NonNull;
|
||||||
use std::{fmt, slice};
|
use std::{fmt, slice};
|
||||||
|
|
||||||
use crate::tree_sitter::query::property::QueryProperty;
|
use crate::tree_sitter::query::property::QueryProperty;
|
||||||
use crate::tree_sitter::query::{Capture, Pattern, Query, QueryData, QueryStr};
|
use crate::tree_sitter::query::{Capture, Pattern, PatternData, Query, QueryData, QueryStr};
|
||||||
|
use crate::tree_sitter::query_cursor::MatchedNode;
|
||||||
|
use crate::tree_sitter::TsInput;
|
||||||
|
|
||||||
use regex_cursor::engines::meta::Regex;
|
use regex_cursor::engines::meta::Regex;
|
||||||
|
use regex_cursor::Cursor;
|
||||||
|
|
||||||
macro_rules! bail {
|
macro_rules! bail {
|
||||||
($($args:tt)*) => {{
|
($($args:tt)*) => {{
|
||||||
|
@ -29,25 +34,141 @@ pub(super) enum TextPredicateKind {
|
||||||
AnyString(Box<[QueryStr]>),
|
AnyString(Box<[QueryStr]>),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) struct TextPredicate {
|
pub(crate) struct TextPredicate {
|
||||||
capture: Capture,
|
capture: Capture,
|
||||||
kind: TextPredicateKind,
|
kind: TextPredicateKind,
|
||||||
negated: bool,
|
negated: bool,
|
||||||
match_all: bool,
|
match_all: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn input_matches_str<I: TsInput>(str: &str, range: Range<usize>, input: &mut I) -> bool {
|
||||||
|
if str.len() != range.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let mut str = str.as_bytes();
|
||||||
|
let cursor = input.cursor_at(range.start);
|
||||||
|
let start_in_chunk = range.start - cursor.offset();
|
||||||
|
if range.end - cursor.offset() <= cursor.chunk().len() {
|
||||||
|
// hotpath
|
||||||
|
return &cursor.chunk()[start_in_chunk..range.end - cursor.offset()] == str;
|
||||||
|
}
|
||||||
|
if cursor.chunk()[start_in_chunk..] != str[..cursor.chunk().len() - start_in_chunk] {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
str = &str[..cursor.chunk().len() - start_in_chunk];
|
||||||
|
while cursor.advance() {
|
||||||
|
if str.len() <= cursor.chunk().len() {
|
||||||
|
return &cursor.chunk()[..range.end - cursor.offset()] == str;
|
||||||
|
}
|
||||||
|
if &str[..cursor.chunk().len()] != cursor.chunk() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
str = &str[cursor.chunk().len()..]
|
||||||
|
}
|
||||||
|
// buggy cursor/invalid range
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inputs_match<I: TsInput>(str: &str, range: Range<usize>, input: &mut I) -> bool {
|
||||||
|
if str.len() != range.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let mut str = str.as_bytes();
|
||||||
|
let cursor = input.cursor_at(range.start);
|
||||||
|
let start_in_chunk = range.start - cursor.offset();
|
||||||
|
if range.end - cursor.offset() <= cursor.chunk().len() {
|
||||||
|
// hotpath
|
||||||
|
return &cursor.chunk()[start_in_chunk..range.end - cursor.offset()] == str;
|
||||||
|
}
|
||||||
|
if cursor.chunk()[start_in_chunk..] != str[..cursor.chunk().len() - start_in_chunk] {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
str = &str[..cursor.chunk().len() - start_in_chunk];
|
||||||
|
while cursor.advance() {
|
||||||
|
if str.len() <= cursor.chunk().len() {
|
||||||
|
return &cursor.chunk()[..range.end - cursor.offset()] == str;
|
||||||
|
}
|
||||||
|
if &str[..cursor.chunk().len()] != cursor.chunk() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
str = &str[cursor.chunk().len()..]
|
||||||
|
}
|
||||||
|
// buggy cursor/invalid range
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TextPredicate {
|
||||||
|
/// handlers match_all and negated
|
||||||
|
fn satisfied_helper(&self, mut nodes: impl Iterator<Item = bool>) -> bool {
|
||||||
|
if self.match_all {
|
||||||
|
nodes.all(|matched| matched != self.negated)
|
||||||
|
} else {
|
||||||
|
nodes.any(|matched| matched != self.negated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn satsified<I: TsInput>(
|
||||||
|
&self,
|
||||||
|
input: &mut I,
|
||||||
|
matched_nodes: &[MatchedNode],
|
||||||
|
query: &Query,
|
||||||
|
) -> bool {
|
||||||
|
let mut capture_nodes = matched_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|matched_node| matched_node.capture == self.capture);
|
||||||
|
match self.kind {
|
||||||
|
TextPredicateKind::EqString(str) => self.satisfied_helper(capture_nodes.map(|node| {
|
||||||
|
let range = node.syntax_node.byte_range();
|
||||||
|
input_matches_str(query.get_string(str), range.clone(), input)
|
||||||
|
})),
|
||||||
|
TextPredicateKind::EqCapture(other_capture) => {
|
||||||
|
let mut other_nodes = matched_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|matched_node| matched_node.capture == other_capture);
|
||||||
|
|
||||||
|
let res = self.satisfied_helper(zip(&mut capture_nodes, &mut other_nodes).map(
|
||||||
|
|(node1, node2)| {
|
||||||
|
let range1 = node1.syntax_node.byte_range();
|
||||||
|
let range2 = node2.syntax_node.byte_range();
|
||||||
|
input.eq(range1, range2)
|
||||||
|
},
|
||||||
|
));
|
||||||
|
let consumed_all = capture_nodes.next().is_none() && other_nodes.next().is_none();
|
||||||
|
res && (!self.match_all || consumed_all)
|
||||||
|
}
|
||||||
|
TextPredicateKind::MatchString(ref regex) => {
|
||||||
|
self.satisfied_helper(capture_nodes.map(|node| {
|
||||||
|
let range = node.syntax_node.byte_range();
|
||||||
|
let input = regex_cursor::Input::new(input.cursor_at(range.start)).range(range);
|
||||||
|
regex.is_match(input)
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
TextPredicateKind::AnyString(ref strings) => {
|
||||||
|
let strings = strings.iter().map(|&str| query.get_string(str));
|
||||||
|
self.satisfied_helper(capture_nodes.map(|node| {
|
||||||
|
let range = node.syntax_node.byte_range();
|
||||||
|
strings
|
||||||
|
.clone()
|
||||||
|
.filter(|str| str.len() == range.len())
|
||||||
|
.any(|str| input_matches_str(str, range.clone(), input))
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Query {
|
impl Query {
|
||||||
pub(super) fn parse_pattern_predicates(
|
pub(super) fn parse_pattern_predicates(
|
||||||
&mut self,
|
&mut self,
|
||||||
pattern_index: u32,
|
pattern: Pattern,
|
||||||
mut custom_predicate: impl FnMut(Predicate) -> Result<(), InvalidPredicateError>,
|
mut custom_predicate: impl FnMut(Pattern, Predicate) -> Result<(), InvalidPredicateError>,
|
||||||
) -> Result<Pattern, InvalidPredicateError> {
|
) -> Result<PatternData, InvalidPredicateError> {
|
||||||
let text_predicate_start = self.text_predicates.len() as u32;
|
let text_predicate_start = self.text_predicates.len() as u32;
|
||||||
let property_start = self.properties.len() as u32;
|
let property_start = self.properties.len() as u32;
|
||||||
|
|
||||||
let predicate_steps = unsafe {
|
let predicate_steps = unsafe {
|
||||||
let mut len = 0u32;
|
let mut len = 0u32;
|
||||||
let raw_predicates = ts_query_predicates_for_pattern(self.raw, pattern_index, &mut len);
|
let raw_predicates = ts_query_predicates_for_pattern(self.raw, pattern.0, &mut len);
|
||||||
(len != 0)
|
(len != 0)
|
||||||
.then(|| slice::from_raw_parts(raw_predicates, len as usize))
|
.then(|| slice::from_raw_parts(raw_predicates, len as usize))
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
|
@ -118,10 +239,10 @@ impl Query {
|
||||||
// is and is-not are better handeled as custom predicates since interpreting is context dependent
|
// is and is-not are better handeled as custom predicates since interpreting is context dependent
|
||||||
// "is?" => property_predicates.push((QueryProperty::parse(&predicate), false)),
|
// "is?" => property_predicates.push((QueryProperty::parse(&predicate), false)),
|
||||||
// "is-not?" => property_predicates.push((QueryProperty::parse(&predicate), true)),
|
// "is-not?" => property_predicates.push((QueryProperty::parse(&predicate), true)),
|
||||||
_ => custom_predicate(predicate)?,
|
_ => custom_predicate(pattern, predicate)?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Pattern {
|
Ok(PatternData {
|
||||||
text_predicates: text_predicate_start..self.text_predicates.len() as u32,
|
text_predicates: text_predicate_start..self.text_predicates.len() as u32,
|
||||||
properties: property_start..self.properties.len() as u32,
|
properties: property_start..self.properties.len() as u32,
|
||||||
})
|
})
|
||||||
|
|
|
@ -1,66 +0,0 @@
|
||||||
use std::ptr::{self, NonNull};
|
|
||||||
|
|
||||||
use regex_cursor::Cursor;
|
|
||||||
|
|
||||||
use crate::tree_sitter::query::Query;
|
|
||||||
use crate::tree_sitter::syntax_tree_node::SyntaxTreeNodeRaw;
|
|
||||||
|
|
||||||
enum QueryCursorData {}
|
|
||||||
|
|
||||||
pub struct QueryCaptures<'a> {
|
|
||||||
query: &'a Query,
|
|
||||||
query_cursor: &'a mut QueryCursorData,
|
|
||||||
text_cursor: regex_cursor::RopeyCursor<'a>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<C: Cursor> QueryCaptures<'_, C> {
|
|
||||||
fn next(&mut self) {
|
|
||||||
let mut query_match = TSQueryMatch {
|
|
||||||
id: 0,
|
|
||||||
pattern_index: 0,
|
|
||||||
capture_count: 0,
|
|
||||||
captures: ptr::null(),
|
|
||||||
};
|
|
||||||
let mut capture_idx = 0;
|
|
||||||
loop {
|
|
||||||
let success = unsafe {
|
|
||||||
ts_query_cursor_next_capture(
|
|
||||||
&mut self.query_cursor,
|
|
||||||
&mut query_match,
|
|
||||||
&mut capture_idx,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
if !success {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let mut input = regex_cursor::Input::new(self.text_cursor.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[repr(C)]
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct TSQueryCapture {
|
|
||||||
node: SyntaxTreeNodeRaw,
|
|
||||||
index: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[repr(C)]
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct TSQueryMatch {
|
|
||||||
id: u32,
|
|
||||||
pattern_index: u16,
|
|
||||||
capture_count: u16,
|
|
||||||
captures: *const TSQueryCapture,
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
/// Advance to the next capture of the currently running query.
|
|
||||||
/// If there is a capture, write its match to `*match` and its index within
|
|
||||||
/// the matche's capture list to `*capture_index`. Otherwise, return `false`.
|
|
||||||
fn ts_query_cursor_next_capture(
|
|
||||||
self_: &mut QueryCursorData,
|
|
||||||
match_: &mut TSQueryMatch,
|
|
||||||
capture_index: &mut u32,
|
|
||||||
) -> bool;
|
|
||||||
}
|
|
319
helix-syntax/src/tree_sitter/query_cursor.rs
Normal file
319
helix-syntax/src/tree_sitter/query_cursor.rs
Normal file
|
@ -0,0 +1,319 @@
|
||||||
|
use core::slice;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
use std::mem::replace;
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::ptr::{self, NonNull};
|
||||||
|
|
||||||
|
use crate::tree_sitter::query::{Capture, Pattern, Query, QueryData};
|
||||||
|
use crate::tree_sitter::syntax_tree_node::SyntaxTreeNodeRaw;
|
||||||
|
use crate::tree_sitter::{SyntaxTree, SyntaxTreeNode, TsInput};
|
||||||
|
|
||||||
|
enum QueryCursorData {}
|
||||||
|
|
||||||
|
pub struct QueryCursor<'a, 'tree, I: TsInput> {
|
||||||
|
query: &'a Query,
|
||||||
|
ptr: *mut QueryCursorData,
|
||||||
|
tree: PhantomData<&'tree SyntaxTree>,
|
||||||
|
input: I,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'tree, I: TsInput> QueryCursor<'_, 'tree, I> {
|
||||||
|
pub fn next_match(&mut self) -> Option<QueryMatch<'_, 'tree>> {
|
||||||
|
let mut query_match = TSQueryMatch {
|
||||||
|
id: 0,
|
||||||
|
pattern_index: 0,
|
||||||
|
capture_count: 0,
|
||||||
|
captures: ptr::null(),
|
||||||
|
};
|
||||||
|
loop {
|
||||||
|
let success = unsafe { ts_query_cursor_next_match(self.ptr, &mut query_match) };
|
||||||
|
if !success {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let matched_nodes = unsafe {
|
||||||
|
slice::from_raw_parts(
|
||||||
|
query_match.captures.cast(),
|
||||||
|
query_match.capture_count as usize,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
let satisfies_predicates = self
|
||||||
|
.query
|
||||||
|
.pattern_text_predicates(query_match.pattern_index)
|
||||||
|
.iter()
|
||||||
|
.all(|predicate| predicate.satsified(&mut self.input, matched_nodes, self.query));
|
||||||
|
if satisfies_predicates {
|
||||||
|
let res = QueryMatch {
|
||||||
|
id: query_match.id,
|
||||||
|
pattern: Pattern(query_match.pattern_index as u32),
|
||||||
|
matched_nodes,
|
||||||
|
query_cursor: unsafe { &mut *self.ptr },
|
||||||
|
_tree: PhantomData,
|
||||||
|
};
|
||||||
|
return Some(res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next_matched_node(&mut self) -> Option<(QueryMatch<'_, 'tree>, MatchedNodeIdx)> {
|
||||||
|
let mut query_match = TSQueryMatch {
|
||||||
|
id: 0,
|
||||||
|
pattern_index: 0,
|
||||||
|
capture_count: 0,
|
||||||
|
captures: ptr::null(),
|
||||||
|
};
|
||||||
|
let mut capture_idx = 0;
|
||||||
|
loop {
|
||||||
|
let success = unsafe {
|
||||||
|
ts_query_cursor_next_capture(self.ptr, &mut query_match, &mut capture_idx)
|
||||||
|
};
|
||||||
|
if !success {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let matched_nodes = unsafe {
|
||||||
|
slice::from_raw_parts(
|
||||||
|
query_match.captures.cast(),
|
||||||
|
query_match.capture_count as usize,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
let satisfies_predicates = self
|
||||||
|
.query
|
||||||
|
.pattern_text_predicates(query_match.pattern_index)
|
||||||
|
.iter()
|
||||||
|
.all(|predicate| predicate.satsified(&mut self.input, matched_nodes, self.query));
|
||||||
|
if satisfies_predicates {
|
||||||
|
let res = QueryMatch {
|
||||||
|
id: query_match.id,
|
||||||
|
pattern: Pattern(query_match.pattern_index as u32),
|
||||||
|
matched_nodes,
|
||||||
|
query_cursor: unsafe { &mut *self.ptr },
|
||||||
|
_tree: PhantomData,
|
||||||
|
};
|
||||||
|
return Some((res, capture_idx));
|
||||||
|
} else {
|
||||||
|
unsafe {
|
||||||
|
ts_query_cursor_remove_match(self.ptr, query_match.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_byte_range(&mut self, range: Range<usize>) {
|
||||||
|
unsafe {
|
||||||
|
ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reuse(mut self) -> InactiveQueryCursor {
|
||||||
|
let ptr = replace(&mut self.ptr, ptr::null_mut());
|
||||||
|
InactiveQueryCursor {
|
||||||
|
ptr: unsafe { NonNull::new_unchecked(ptr) },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: TsInput> Drop for QueryCursor<'_, '_, I> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
// we allow moving the cursor data out so we need the null check here
|
||||||
|
// would be cleaner with a subtype but doesn't really matter at the end of the day
|
||||||
|
if !self.ptr.is_null() {
|
||||||
|
unsafe { ts_query_cursor_delete(self.ptr) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A query cursor that is not actively associated with a query
|
||||||
|
pub struct InactiveQueryCursor {
|
||||||
|
ptr: NonNull<QueryCursorData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InactiveQueryCursor {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
InactiveQueryCursor {
|
||||||
|
ptr: unsafe { NonNull::new_unchecked(ts_query_cursor_new()) },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the maximum number of in-progress matches for this cursor.
|
||||||
|
#[doc(alias = "ts_query_cursor_match_limit")]
|
||||||
|
#[must_use]
|
||||||
|
pub fn match_limit(&self) -> u32 {
|
||||||
|
unsafe { ts_query_cursor_match_limit(self.ptr.as_ptr()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the maximum number of in-progress matches for this cursor. The
|
||||||
|
/// limit must be > 0 and <= 65536.
|
||||||
|
#[doc(alias = "ts_query_cursor_set_match_limit")]
|
||||||
|
pub fn set_match_limit(&mut self, limit: u32) {
|
||||||
|
unsafe {
|
||||||
|
ts_query_cursor_set_match_limit(self.ptr.as_ptr(), limit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if, on its last execution, this cursor exceeded its maximum number
|
||||||
|
/// of in-progress matches.
|
||||||
|
#[doc(alias = "ts_query_cursor_did_exceed_match_limit")]
|
||||||
|
#[must_use]
|
||||||
|
pub fn did_exceed_match_limit(&self) -> bool {
|
||||||
|
unsafe { ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_byte_range(&mut self, range: Range<usize>) {
|
||||||
|
unsafe {
|
||||||
|
ts_query_cursor_set_byte_range(self.ptr.as_ptr(), range.start as u32, range.end as u32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn execute_query<'a, 'tree, I: TsInput>(
|
||||||
|
self,
|
||||||
|
query: &'a Query,
|
||||||
|
node: &SyntaxTreeNode<'tree>,
|
||||||
|
input: I,
|
||||||
|
) -> QueryCursor<'a, 'tree, I> {
|
||||||
|
let ptr = self.ptr.as_ptr();
|
||||||
|
unsafe { ts_query_cursor_exec(self.ptr.as_ptr(), query.raw.as_ref(), node.as_raw()) };
|
||||||
|
QueryCursor {
|
||||||
|
query,
|
||||||
|
ptr,
|
||||||
|
tree: PhantomData,
|
||||||
|
input,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for InactiveQueryCursor {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
unsafe { ts_query_cursor_delete(self.ptr.as_ptr()) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type MatchedNodeIdx = u32;
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct MatchedNode<'tree> {
|
||||||
|
pub syntax_node: SyntaxTreeNode<'tree>,
|
||||||
|
pub capture: Capture,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct QueryMatch<'cursor, 'tree> {
|
||||||
|
id: u32,
|
||||||
|
pattern: Pattern,
|
||||||
|
matched_nodes: &'cursor [MatchedNode<'tree>],
|
||||||
|
query_cursor: &'cursor mut QueryCursorData,
|
||||||
|
_tree: PhantomData<&'tree super::SyntaxTree>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'tree> QueryMatch<'_, 'tree> {
|
||||||
|
pub fn matched_nodes(&self) -> impl Iterator<Item = &MatchedNode<'tree>> {
|
||||||
|
self.matched_nodes.iter()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn matched_node(&self, i: MatchedNodeIdx) -> &MatchedNode {
|
||||||
|
&self.matched_nodes[i as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn id(&self) -> u32 {
|
||||||
|
self.id
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn pattern(&self) -> Pattern {
|
||||||
|
self.pattern
|
||||||
|
}
|
||||||
|
|
||||||
|
#[doc(alias = "ts_query_cursor_remove_match")]
|
||||||
|
/// removes this match from the cursor so that further captures
|
||||||
|
/// from its cursor so that future captures that belong to this match
|
||||||
|
/// are no longer returned by capture iterators
|
||||||
|
pub fn remove(self) {
|
||||||
|
unsafe {
|
||||||
|
ts_query_cursor_remove_match(self.query_cursor, self.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct TSQueryCapture {
|
||||||
|
node: SyntaxTreeNodeRaw,
|
||||||
|
index: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct TSQueryMatch {
|
||||||
|
id: u32,
|
||||||
|
pattern_index: u16,
|
||||||
|
capture_count: u16,
|
||||||
|
captures: *const TSQueryCapture,
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
/// Advance to the next capture of the currently running query.
|
||||||
|
/// If there is a capture, write its match to `*match` and its index within
|
||||||
|
/// the matche's capture list to `*capture_index`. Otherwise, return `false`.
|
||||||
|
fn ts_query_cursor_next_capture(
|
||||||
|
self_: *mut QueryCursorData,
|
||||||
|
match_: &mut TSQueryMatch,
|
||||||
|
capture_index: &mut u32,
|
||||||
|
) -> bool;
|
||||||
|
|
||||||
|
/// Advance to the next match of the currently running query.
|
||||||
|
///
|
||||||
|
/// If there is a match, write it to `*match` and return `true`.
|
||||||
|
/// Otherwise, return `false`.
|
||||||
|
pub fn ts_query_cursor_next_match(
|
||||||
|
self_: *mut QueryCursorData,
|
||||||
|
match_: &mut TSQueryMatch,
|
||||||
|
) -> bool;
|
||||||
|
fn ts_query_cursor_remove_match(self_: *mut QueryCursorData, match_id: u32);
|
||||||
|
/// Delete a query cursor, freeing all of the memory that it used
|
||||||
|
pub fn ts_query_cursor_delete(self_: *mut QueryCursorData);
|
||||||
|
/// Create a new cursor for executing a given query.
|
||||||
|
/// The cursor stores the state that is needed to iteratively search
|
||||||
|
/// for matches. To use the query cursor, first call [`ts_query_cursor_exec`]
|
||||||
|
/// to start running a given query on a given syntax node. Then, there are
|
||||||
|
/// two options for consuming the results of the query:
|
||||||
|
/// 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the
|
||||||
|
/// *matches* in the order that they were found. Each match contains the
|
||||||
|
/// index of the pattern that matched, and an array of captures. Because
|
||||||
|
/// multiple patterns can match the same set of nodes, one match may contain
|
||||||
|
/// captures that appear *before* some of the captures from a previous match.
|
||||||
|
/// 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the
|
||||||
|
/// individual *captures* in the order that they appear. This is useful if
|
||||||
|
/// don't care about which pattern matched, and just want a single ordered
|
||||||
|
/// sequence of captures.
|
||||||
|
/// If you don't care about consuming all of the results, you can stop calling
|
||||||
|
/// [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.
|
||||||
|
/// You can then start executing another query on another node by calling
|
||||||
|
/// [`ts_query_cursor_exec`] again."]
|
||||||
|
pub fn ts_query_cursor_new() -> *mut QueryCursorData;
|
||||||
|
|
||||||
|
/// Start running a given query on a given node.
|
||||||
|
pub fn ts_query_cursor_exec(
|
||||||
|
self_: *mut QueryCursorData,
|
||||||
|
query: &QueryData,
|
||||||
|
node: SyntaxTreeNodeRaw,
|
||||||
|
);
|
||||||
|
/// Manage the maximum number of in-progress matches allowed by this query
|
||||||
|
/// cursor.
|
||||||
|
///
|
||||||
|
/// Query cursors have an optional maximum capacity for storing lists of
|
||||||
|
/// in-progress captures. If this capacity is exceeded, then the
|
||||||
|
/// earliest-starting match will silently be dropped to make room for further
|
||||||
|
/// matches. This maximum capacity is optional — by default, query cursors allow
|
||||||
|
/// any number of pending matches, dynamically allocating new space for them as
|
||||||
|
/// needed as the query is executed.
|
||||||
|
pub fn ts_query_cursor_did_exceed_match_limit(self_: *const QueryCursorData) -> bool;
|
||||||
|
pub fn ts_query_cursor_match_limit(self_: *const QueryCursorData) -> u32;
|
||||||
|
pub fn ts_query_cursor_set_match_limit(self_: *mut QueryCursorData, limit: u32);
|
||||||
|
/// Set the range of bytes or (row, column) positions in which the query
|
||||||
|
/// will be executed.
|
||||||
|
pub fn ts_query_cursor_set_byte_range(
|
||||||
|
self_: *mut QueryCursorData,
|
||||||
|
start_byte: u32,
|
||||||
|
end_byte: u32,
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
1
helix-syntax/src/tree_sitter/query_match.rs
Normal file
1
helix-syntax/src/tree_sitter/query_match.rs
Normal file
|
@ -0,0 +1 @@
|
||||||
|
pub struct QueryMatch {}
|
|
@ -1,38 +1,54 @@
|
||||||
|
use std::ops;
|
||||||
|
|
||||||
use regex_cursor::{Cursor, RopeyCursor};
|
use regex_cursor::{Cursor, RopeyCursor};
|
||||||
use ropey::RopeSlice;
|
use ropey::RopeSlice;
|
||||||
|
|
||||||
use crate::tree_sitter::parser::{IntoParserInput, ParserInput};
|
use crate::tree_sitter::{IntoTsInput, TsInput};
|
||||||
|
|
||||||
pub struct RopeParserInput<'a> {
|
pub struct RopeTsInput<'a> {
|
||||||
src: RopeSlice<'a>,
|
src: RopeSlice<'a>,
|
||||||
cursor: regex_cursor::RopeyCursor<'a>,
|
cursor: regex_cursor::RopeyCursor<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> IntoParserInput for RopeSlice<'a> {
|
impl<'a> RopeTsInput<'a> {
|
||||||
type ParserInput = RopeParserInput<'a>;
|
pub fn new(src: RopeSlice<'a>) -> Self {
|
||||||
|
RopeTsInput {
|
||||||
|
src,
|
||||||
|
cursor: regex_cursor::RopeyCursor::new(src),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn into_parser_input(self) -> Self::ParserInput {
|
impl<'a> IntoTsInput for RopeSlice<'a> {
|
||||||
RopeParserInput {
|
type TsInput = RopeTsInput<'a>;
|
||||||
|
|
||||||
|
fn into_ts_input(self) -> Self::TsInput {
|
||||||
|
RopeTsInput {
|
||||||
src: self,
|
src: self,
|
||||||
cursor: RopeyCursor::new(self),
|
cursor: RopeyCursor::new(self),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ParserInput for RopeParserInput<'_> {
|
impl<'a> TsInput for RopeTsInput<'a> {
|
||||||
fn read(&mut self, offset: usize) -> &[u8] {
|
type Cursor = RopeyCursor<'a>;
|
||||||
|
fn cursor_at(&mut self, offset: usize) -> &mut RopeyCursor<'a> {
|
||||||
// this cursor is optimized for contigous reads which are by far the most common during parsing
|
// this cursor is optimized for contigous reads which are by far the most common during parsing
|
||||||
// very far jumps (like injections at the other end of the document) are handelde
|
// very far jumps (like injections at the other end of the document) are handelde
|
||||||
// by restarting a new cursor (new chunks iterator)
|
// by starting a new cursor (new chunks iterator)
|
||||||
if offset < self.cursor.offset() && self.cursor.offset() - offset > 4906 {
|
if offset < self.cursor.offset() || self.cursor.offset() - offset > 4906 {
|
||||||
self.cursor = regex_cursor::RopeyCursor::at(self.src, offset);
|
self.cursor = regex_cursor::RopeyCursor::at(self.src, offset);
|
||||||
} else {
|
} else {
|
||||||
while self.cursor.offset() + self.cursor.chunk().len() >= offset {
|
while self.cursor.offset() + self.cursor.chunk().len() >= offset {
|
||||||
if !self.cursor.advance() {
|
if !self.cursor.advance() {
|
||||||
return &[];
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.cursor.chunk()
|
&mut self.cursor
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eq(&mut self, range1: ops::Range<usize>, range2: ops::Range<usize>) -> bool {
|
||||||
|
self.src.byte_slice(range1) == self.src.byte_slice(range2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@ impl From<SyntaxTreeNode<'_>> for SyntaxTreeNodeRaw {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
#[repr(C)]
|
||||||
pub struct SyntaxTreeNode<'tree> {
|
pub struct SyntaxTreeNode<'tree> {
|
||||||
context: [u32; 4],
|
context: [u32; 4],
|
||||||
id: NonNull<c_void>,
|
id: NonNull<c_void>,
|
||||||
|
@ -44,7 +45,7 @@ impl<'tree> SyntaxTreeNode<'tree> {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_raw(&self) -> SyntaxTreeNodeRaw {
|
pub(crate) fn as_raw(&self) -> SyntaxTreeNodeRaw {
|
||||||
SyntaxTreeNodeRaw {
|
SyntaxTreeNodeRaw {
|
||||||
context: self.context,
|
context: self.context,
|
||||||
id: self.id.as_ptr(),
|
id: self.id.as_ptr(),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue