refactor(lexer): make functions for specific tokens

This commit is contained in:
Artemy Egorov 2024-08-11 18:21:24 +03:00
parent aaa4328818
commit ccc9773030
4 changed files with 136 additions and 106 deletions

View file

@ -1,12 +1,12 @@
use ariadne::{Color, Label, Report, ReportKind, Source}; use ariadne::{Color, Label, Report, ReportKind, Source};
use chumsky::Parser; use chumsky::Parser;
use dalet::daleth::{format::format, lexer::lexer}; use dalet::daleth::{format::format, lexer::full_lexer};
fn main() { fn main() {
let src_file = "daleth.dlth"; let src_file = "daleth.dlth";
let src = include_str!("./daleth.dlth"); let src = include_str!("./daleth.dlth");
let parsed = lexer().parse(src); let parsed = full_lexer().parse(src);
match parsed.into_result() { match parsed.into_result() {
Ok(t) => { Ok(t) => {

View file

@ -4,7 +4,36 @@ pub mod types;
pub fn lexer<'src>( pub fn lexer<'src>(
) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> { ) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
let tag = choice(( let token = choice((symbol(), tag(), argument(), textual()));
token
.padded()
.padded_by(comment())
.map_with(|t, e| (t, e.span()))
.repeated()
.collect()
}
pub fn full_lexer<'src>(
) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
let token = choice((
empty_line(),
comment(),
symbol(),
tag(),
argument(),
textual(),
));
token
.padded_by(text::whitespace().and_is(empty_line().not()).or_not())
.map_with(|t, e| (t, e.span()))
.repeated()
.collect()
}
fn tag<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>> {
choice((
just("el").to(Token::El), just("el").to(Token::El),
just("h").to(Token::H), just("h").to(Token::H),
just("p").to(Token::P), just("p").to(Token::P),
@ -39,108 +68,109 @@ pub fn lexer<'src>(
just("pre").to(Token::Pre), just("pre").to(Token::Pre),
just("meta").to(Token::Meta), just("meta").to(Token::Meta),
))) )))
.labelled("Tag"); .labelled("Tag")
}
let symbol = choice(( fn symbol<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>> {
choice((
just("[[").to(Token::ElOpen).labelled("[["), just("[[").to(Token::ElOpen).labelled("[["),
just("]]").to(Token::ElClose).labelled("]]"), just("]]").to(Token::ElClose).labelled("]]"),
just("[").to(Token::LSquare).labelled("["), just("[").to(Token::LSquare).labelled("["),
just("]").to(Token::RSquare).labelled("]"), just("]").to(Token::RSquare).labelled("]"),
)); ))
}
let argument = {
let arg_escape = just('\\') fn argument<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>>
.ignore_then(just('"')) {
.labelled("Escape sequence for argument"); let arg_escape = just('\\')
.ignore_then(just('"'))
let number = text::int(10) .labelled("Escape sequence for argument");
.from_str()
.unwrapped() let number = text::int(10)
.map(Token::NumberArgument) .from_str()
.labelled("Number argument"); .unwrapped()
.map(Token::NumberArgument)
let text_argument = none_of("\"\n\\") .labelled("Number argument");
.or(arg_escape)
.repeated() let text_argument = none_of("\"\n\\")
.to_slice() .or(arg_escape)
.delimited_by(just('"'), just('"')) .repeated()
.map(Token::TextArgument) .to_slice()
.labelled("Text argument"); .delimited_by(just('"'), just('"'))
.map(Token::TextArgument)
choice((number, text_argument)) .labelled("Text argument");
};
choice((number, text_argument))
let textual = { }
let escape = just('\\')
.ignore_then(just('}')) fn textual<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>>
.labelled("Multi-line escape sequence"); {
let escape = just('\\')
let text = none_of("\n") .ignore_then(just('}'))
.repeated() .labelled("Multi-line escape sequence");
.to_slice()
.padded_by(text::inline_whitespace()); let text = none_of("\n")
.repeated()
let text_body = just(':') .to_slice()
.ignore_then(text) .padded_by(text::inline_whitespace());
.map(Token::TextBody)
.labelled("One line text body"); let text_body = just(':')
.ignore_then(text)
let text_tag = text .map(Token::TextBody)
.then_ignore(just('\n')) .labelled("One line text body");
.map(Token::TextTag)
.labelled("Text tag"); let text_tag = text
.then_ignore(just('\n'))
let multiline_text_body = none_of("}\\") .map(Token::TextTag)
.or(escape) .labelled("Text tag");
.repeated()
.to_slice() let multiline_text_body = none_of("}\\")
.labelled("Body of multiline text"); .or(escape)
.repeated()
let paragraph = multiline_text_body .to_slice()
.clone() .labelled("Body of multiline text");
.delimited_by(just("{-"), just("}"))
.map(Token::Paragraph) let paragraph = multiline_text_body
.labelled("Paragraph syntax"); .clone()
.delimited_by(just("{-"), just("}"))
let mltext = multiline_text_body .map(Token::Paragraph)
.clone() .labelled("Paragraph syntax");
.delimited_by(just('{'), just('}'))
.map(Token::MLText) let mltext = multiline_text_body
.labelled("Multiline text"); .clone()
.delimited_by(just('{'), just('}'))
let mlmstext = { .map(Token::MLText)
let mlms_n = just("{~") .labelled("Multiline text");
.ignore_then(text::int(10).from_str().unwrapped())
.labelled("Minimum spaces number"); let mlmstext = {
let mlms_n = just("{~")
mlms_n .ignore_then(text::int(10).from_str().unwrapped())
.then(multiline_text_body.clone()) .labelled("Minimum spaces number");
.then_ignore(just("}"))
.map(|(n, t)| Token::MLMSText(n, t)) mlms_n
.labelled("Multi line text with min spaces") .then(multiline_text_body.clone())
}; .then_ignore(just("}"))
.map(|(n, t)| Token::MLMSText(n, t))
let rmltext = multiline_text_body .labelled("Multi line text with min spaces")
.delimited_by(just("{#"), just('}')) };
.map(Token::RMLText)
.labelled("Raw multiline text"); let rmltext = multiline_text_body
.delimited_by(just("{#"), just('}'))
choice((paragraph, mlmstext, rmltext, mltext, text_body, text_tag)) .map(Token::RMLText)
}; .labelled("Raw multiline text");
let comment = just('#') choice((paragraph, mlmstext, rmltext, mltext, text_body, text_tag))
.ignore_then(none_of("\n").repeated().to_slice()) }
.map(Token::Comment);
fn comment<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>>
let empty_line = text::inline_whitespace() {
.delimited_by(text::newline(), text::newline()) just('#')
.to(Token::EmptyLine); .ignore_then(none_of("\n").repeated().to_slice())
.map(Token::Comment)
let token = choice((empty_line.clone(), comment, symbol, tag, argument, textual)); }
fn empty_line<'src>(
token ) -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>> {
.padded_by(text::whitespace().and_is(empty_line.not()).or_not()) text::inline_whitespace()
.map_with(|t, e| (t, e.span())) .delimited_by(text::newline(), text::newline())
.repeated() .to(Token::EmptyLine)
.collect()
} }

View file

@ -28,11 +28,11 @@ pub enum Token<'src> {
/// Raw Multi line text /// Raw Multi line text
RMLText(&'src str), RMLText(&'src str),
/// Special // Special
TextTag(&'src str), TextTag(&'src str),
Paragraph(&'src str), Paragraph(&'src str),
/// Special removed before parse // Special for formatting, ignored for parse
Comment(&'src str), Comment(&'src str),
EmptyLine, EmptyLine,

View file

@ -4,7 +4,7 @@ use ariadne::{Color, Label, Report, ReportKind, Source};
use chumsky::Parser; use chumsky::Parser;
use clap::Parser as ClapParser; use clap::Parser as ClapParser;
use commands::{Cli, Commands::*}; use commands::{Cli, Commands::*};
use dalet::daleth::{format::format, lexer::lexer}; use dalet::daleth::{format::format, lexer::full_lexer};
use std::fs; use std::fs;
fn main() { fn main() {
@ -16,7 +16,7 @@ fn main() {
let src_file = &path.to_string_lossy().to_string(); let src_file = &path.to_string_lossy().to_string();
let src = fs::read_to_string(src_file).unwrap(); let src = fs::read_to_string(src_file).unwrap();
let parsed = lexer().parse(&src); let parsed = full_lexer().parse(&src);
match parsed.into_result() { match parsed.into_result() {
Ok(t) => { Ok(t) => {