From edf799904e34b8349b568fbf8783fa3c7207147c Mon Sep 17 00:00:00 2001 From: Artemy Egorov Date: Wed, 7 Aug 2024 22:41:55 +0300 Subject: [PATCH] feat: symbols, text, number lexer --- src/daleth/lexer/mod.rs | 68 +++++++++++++++++++++++++ src/daleth/{lexer.rs => lexer/types.rs} | 8 ++- 2 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 src/daleth/lexer/mod.rs rename src/daleth/{lexer.rs => lexer/types.rs} (87%) diff --git a/src/daleth/lexer/mod.rs b/src/daleth/lexer/mod.rs new file mode 100644 index 0000000..5960d08 --- /dev/null +++ b/src/daleth/lexer/mod.rs @@ -0,0 +1,68 @@ +use chumsky::prelude::*; +use types::{Span, Spanned, Token}; +pub mod types; + +pub fn lexer<'src>( +) -> impl Parser<'src, &'src str, Vec>>, extra::Err>> { + let symbol = choice(( + just("(").to(Token::LParen).labelled("LParen"), + just(")").to(Token::RParen).labelled("RParen"), + just("{").to(Token::LAngle).labelled("LAngle"), + just("}").to(Token::RAngle).labelled("RAngle"), + just("[").to(Token::LSquare).labelled("LSquare"), + just("]").to(Token::RSquare).labelled("RSquare"), + just(":").to(Token::Colon).labelled("Colon"), + )) + .labelled("symbol"); + + let number = text::int(10) + .from_str() + .unwrapped() + .map(Token::Number) + .labelled("number"); + + let textual = { + let escape = just('\\') + .ignore_then(choice(( + just("\\`").to('`'.to_owned()), + just("\\]").to(']'.to_owned()), + ))) + .labelled("escape sequence"); + + let text = none_of("]\n") + .or(escape.clone()) + .repeated() + .to_slice() + .map(Token::Text); + + let multiline_text = none_of("`").or(escape).repeated(); + + let mltext = multiline_text + .clone() + .delimited_by(just('`'), just('`')) + .to_slice() + .map(Token::MLText) + .labelled("multiline text"); + + let mlmstext = multiline_text + .delimited_by(just("`#"), just('`')) + .to_slice() + .map(Token::RMLText) + .labelled("raw multiline text"); + + choice((mltext, mlmstext, text)) + }; + + let comment = just("#") + .then(none_of("\n").repeated()) + .to_slice() + .map(Token::Comment); + + let token = choice((symbol, number, textual, comment)); + + token + .map_with(|t, e| (t, e.span())) + .padded() + .repeated() + .collect() +} diff --git a/src/daleth/lexer.rs b/src/daleth/lexer/types.rs similarity index 87% rename from src/daleth/lexer.rs rename to src/daleth/lexer/types.rs index 936e0dc..9e2a903 100644 --- a/src/daleth/lexer.rs +++ b/src/daleth/lexer/types.rs @@ -1,10 +1,11 @@ use chumsky::prelude::*; pub type Span = SimpleSpan; +pub type Spanned = (T, Span); #[derive(Clone, Debug, PartialEq)] pub enum Token<'src> { - // Brackets + // Symbols /// ( LParen, /// ) @@ -17,6 +18,8 @@ pub enum Token<'src> { LSquare, /// ] RSquare, + /// : + Colon, // Values Number(u8), @@ -28,6 +31,9 @@ pub enum Token<'src> { /// Raw Multi line text RMLText(&'src str), + /// Special + Comment(&'src str), + // Tags El, H,