mirror of
https://github.com/TxtDot/dalet-rs.git
synced 2024-11-22 00:36:21 +03:00
refactor(lexer): make functions for specific tokens
This commit is contained in:
parent
aaa4328818
commit
ccc9773030
4 changed files with 136 additions and 106 deletions
|
@ -1,12 +1,12 @@
|
||||||
use ariadne::{Color, Label, Report, ReportKind, Source};
|
use ariadne::{Color, Label, Report, ReportKind, Source};
|
||||||
use chumsky::Parser;
|
use chumsky::Parser;
|
||||||
use dalet::daleth::{format::format, lexer::lexer};
|
use dalet::daleth::{format::format, lexer::full_lexer};
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let src_file = "daleth.dlth";
|
let src_file = "daleth.dlth";
|
||||||
let src = include_str!("./daleth.dlth");
|
let src = include_str!("./daleth.dlth");
|
||||||
|
|
||||||
let parsed = lexer().parse(src);
|
let parsed = full_lexer().parse(src);
|
||||||
|
|
||||||
match parsed.into_result() {
|
match parsed.into_result() {
|
||||||
Ok(t) => {
|
Ok(t) => {
|
||||||
|
|
|
@ -4,7 +4,36 @@ pub mod types;
|
||||||
|
|
||||||
pub fn lexer<'src>(
|
pub fn lexer<'src>(
|
||||||
) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
|
) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
|
||||||
let tag = choice((
|
let token = choice((symbol(), tag(), argument(), textual()));
|
||||||
|
|
||||||
|
token
|
||||||
|
.padded()
|
||||||
|
.padded_by(comment())
|
||||||
|
.map_with(|t, e| (t, e.span()))
|
||||||
|
.repeated()
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn full_lexer<'src>(
|
||||||
|
) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
|
||||||
|
let token = choice((
|
||||||
|
empty_line(),
|
||||||
|
comment(),
|
||||||
|
symbol(),
|
||||||
|
tag(),
|
||||||
|
argument(),
|
||||||
|
textual(),
|
||||||
|
));
|
||||||
|
|
||||||
|
token
|
||||||
|
.padded_by(text::whitespace().and_is(empty_line().not()).or_not())
|
||||||
|
.map_with(|t, e| (t, e.span()))
|
||||||
|
.repeated()
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tag<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>> {
|
||||||
|
choice((
|
||||||
just("el").to(Token::El),
|
just("el").to(Token::El),
|
||||||
just("h").to(Token::H),
|
just("h").to(Token::H),
|
||||||
just("p").to(Token::P),
|
just("p").to(Token::P),
|
||||||
|
@ -39,108 +68,109 @@ pub fn lexer<'src>(
|
||||||
just("pre").to(Token::Pre),
|
just("pre").to(Token::Pre),
|
||||||
just("meta").to(Token::Meta),
|
just("meta").to(Token::Meta),
|
||||||
)))
|
)))
|
||||||
.labelled("Tag");
|
.labelled("Tag")
|
||||||
|
}
|
||||||
|
|
||||||
let symbol = choice((
|
fn symbol<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>> {
|
||||||
|
choice((
|
||||||
just("[[").to(Token::ElOpen).labelled("[["),
|
just("[[").to(Token::ElOpen).labelled("[["),
|
||||||
just("]]").to(Token::ElClose).labelled("]]"),
|
just("]]").to(Token::ElClose).labelled("]]"),
|
||||||
just("[").to(Token::LSquare).labelled("["),
|
just("[").to(Token::LSquare).labelled("["),
|
||||||
just("]").to(Token::RSquare).labelled("]"),
|
just("]").to(Token::RSquare).labelled("]"),
|
||||||
));
|
))
|
||||||
|
}
|
||||||
let argument = {
|
|
||||||
let arg_escape = just('\\')
|
fn argument<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>>
|
||||||
.ignore_then(just('"'))
|
{
|
||||||
.labelled("Escape sequence for argument");
|
let arg_escape = just('\\')
|
||||||
|
.ignore_then(just('"'))
|
||||||
let number = text::int(10)
|
.labelled("Escape sequence for argument");
|
||||||
.from_str()
|
|
||||||
.unwrapped()
|
let number = text::int(10)
|
||||||
.map(Token::NumberArgument)
|
.from_str()
|
||||||
.labelled("Number argument");
|
.unwrapped()
|
||||||
|
.map(Token::NumberArgument)
|
||||||
let text_argument = none_of("\"\n\\")
|
.labelled("Number argument");
|
||||||
.or(arg_escape)
|
|
||||||
.repeated()
|
let text_argument = none_of("\"\n\\")
|
||||||
.to_slice()
|
.or(arg_escape)
|
||||||
.delimited_by(just('"'), just('"'))
|
.repeated()
|
||||||
.map(Token::TextArgument)
|
.to_slice()
|
||||||
.labelled("Text argument");
|
.delimited_by(just('"'), just('"'))
|
||||||
|
.map(Token::TextArgument)
|
||||||
choice((number, text_argument))
|
.labelled("Text argument");
|
||||||
};
|
|
||||||
|
choice((number, text_argument))
|
||||||
let textual = {
|
}
|
||||||
let escape = just('\\')
|
|
||||||
.ignore_then(just('}'))
|
fn textual<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>>
|
||||||
.labelled("Multi-line escape sequence");
|
{
|
||||||
|
let escape = just('\\')
|
||||||
let text = none_of("\n")
|
.ignore_then(just('}'))
|
||||||
.repeated()
|
.labelled("Multi-line escape sequence");
|
||||||
.to_slice()
|
|
||||||
.padded_by(text::inline_whitespace());
|
let text = none_of("\n")
|
||||||
|
.repeated()
|
||||||
let text_body = just(':')
|
.to_slice()
|
||||||
.ignore_then(text)
|
.padded_by(text::inline_whitespace());
|
||||||
.map(Token::TextBody)
|
|
||||||
.labelled("One line text body");
|
let text_body = just(':')
|
||||||
|
.ignore_then(text)
|
||||||
let text_tag = text
|
.map(Token::TextBody)
|
||||||
.then_ignore(just('\n'))
|
.labelled("One line text body");
|
||||||
.map(Token::TextTag)
|
|
||||||
.labelled("Text tag");
|
let text_tag = text
|
||||||
|
.then_ignore(just('\n'))
|
||||||
let multiline_text_body = none_of("}\\")
|
.map(Token::TextTag)
|
||||||
.or(escape)
|
.labelled("Text tag");
|
||||||
.repeated()
|
|
||||||
.to_slice()
|
let multiline_text_body = none_of("}\\")
|
||||||
.labelled("Body of multiline text");
|
.or(escape)
|
||||||
|
.repeated()
|
||||||
let paragraph = multiline_text_body
|
.to_slice()
|
||||||
.clone()
|
.labelled("Body of multiline text");
|
||||||
.delimited_by(just("{-"), just("}"))
|
|
||||||
.map(Token::Paragraph)
|
let paragraph = multiline_text_body
|
||||||
.labelled("Paragraph syntax");
|
.clone()
|
||||||
|
.delimited_by(just("{-"), just("}"))
|
||||||
let mltext = multiline_text_body
|
.map(Token::Paragraph)
|
||||||
.clone()
|
.labelled("Paragraph syntax");
|
||||||
.delimited_by(just('{'), just('}'))
|
|
||||||
.map(Token::MLText)
|
let mltext = multiline_text_body
|
||||||
.labelled("Multiline text");
|
.clone()
|
||||||
|
.delimited_by(just('{'), just('}'))
|
||||||
let mlmstext = {
|
.map(Token::MLText)
|
||||||
let mlms_n = just("{~")
|
.labelled("Multiline text");
|
||||||
.ignore_then(text::int(10).from_str().unwrapped())
|
|
||||||
.labelled("Minimum spaces number");
|
let mlmstext = {
|
||||||
|
let mlms_n = just("{~")
|
||||||
mlms_n
|
.ignore_then(text::int(10).from_str().unwrapped())
|
||||||
.then(multiline_text_body.clone())
|
.labelled("Minimum spaces number");
|
||||||
.then_ignore(just("}"))
|
|
||||||
.map(|(n, t)| Token::MLMSText(n, t))
|
mlms_n
|
||||||
.labelled("Multi line text with min spaces")
|
.then(multiline_text_body.clone())
|
||||||
};
|
.then_ignore(just("}"))
|
||||||
|
.map(|(n, t)| Token::MLMSText(n, t))
|
||||||
let rmltext = multiline_text_body
|
.labelled("Multi line text with min spaces")
|
||||||
.delimited_by(just("{#"), just('}'))
|
};
|
||||||
.map(Token::RMLText)
|
|
||||||
.labelled("Raw multiline text");
|
let rmltext = multiline_text_body
|
||||||
|
.delimited_by(just("{#"), just('}'))
|
||||||
choice((paragraph, mlmstext, rmltext, mltext, text_body, text_tag))
|
.map(Token::RMLText)
|
||||||
};
|
.labelled("Raw multiline text");
|
||||||
|
|
||||||
let comment = just('#')
|
choice((paragraph, mlmstext, rmltext, mltext, text_body, text_tag))
|
||||||
.ignore_then(none_of("\n").repeated().to_slice())
|
}
|
||||||
.map(Token::Comment);
|
|
||||||
|
fn comment<'src>() -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>>
|
||||||
let empty_line = text::inline_whitespace()
|
{
|
||||||
.delimited_by(text::newline(), text::newline())
|
just('#')
|
||||||
.to(Token::EmptyLine);
|
.ignore_then(none_of("\n").repeated().to_slice())
|
||||||
|
.map(Token::Comment)
|
||||||
let token = choice((empty_line.clone(), comment, symbol, tag, argument, textual));
|
}
|
||||||
|
fn empty_line<'src>(
|
||||||
token
|
) -> impl Parser<'src, &'src str, Token<'src>, extra::Err<Rich<'src, char, Span>>> {
|
||||||
.padded_by(text::whitespace().and_is(empty_line.not()).or_not())
|
text::inline_whitespace()
|
||||||
.map_with(|t, e| (t, e.span()))
|
.delimited_by(text::newline(), text::newline())
|
||||||
.repeated()
|
.to(Token::EmptyLine)
|
||||||
.collect()
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,11 +28,11 @@ pub enum Token<'src> {
|
||||||
/// Raw Multi line text
|
/// Raw Multi line text
|
||||||
RMLText(&'src str),
|
RMLText(&'src str),
|
||||||
|
|
||||||
/// Special
|
// Special
|
||||||
TextTag(&'src str),
|
TextTag(&'src str),
|
||||||
Paragraph(&'src str),
|
Paragraph(&'src str),
|
||||||
|
|
||||||
/// Special removed before parse
|
// Special for formatting, ignored for parse
|
||||||
Comment(&'src str),
|
Comment(&'src str),
|
||||||
EmptyLine,
|
EmptyLine,
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ use ariadne::{Color, Label, Report, ReportKind, Source};
|
||||||
use chumsky::Parser;
|
use chumsky::Parser;
|
||||||
use clap::Parser as ClapParser;
|
use clap::Parser as ClapParser;
|
||||||
use commands::{Cli, Commands::*};
|
use commands::{Cli, Commands::*};
|
||||||
use dalet::daleth::{format::format, lexer::lexer};
|
use dalet::daleth::{format::format, lexer::full_lexer};
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
@ -16,7 +16,7 @@ fn main() {
|
||||||
let src_file = &path.to_string_lossy().to_string();
|
let src_file = &path.to_string_lossy().to_string();
|
||||||
let src = fs::read_to_string(src_file).unwrap();
|
let src = fs::read_to_string(src_file).unwrap();
|
||||||
|
|
||||||
let parsed = lexer().parse(&src);
|
let parsed = full_lexer().parse(&src);
|
||||||
|
|
||||||
match parsed.into_result() {
|
match parsed.into_result() {
|
||||||
Ok(t) => {
|
Ok(t) => {
|
||||||
|
|
Loading…
Add table
Reference in a new issue