From dfe50cd0f4220dcabeb3362eee05aa0e8174bb67 Mon Sep 17 00:00:00 2001 From: Artemy Egorov Date: Thu, 8 Aug 2024 12:25:12 +0300 Subject: [PATCH] feat: full lexer and example --- {tests => examples}/bench.md | 0 {tests => examples}/bench.rs | 3 +- examples/daleth.dlth | 129 ++++++++++++++++++++++++++++++ examples/daleth_lexer.rs | 27 +++++++ {tests => examples}/gemtext.gmi | 0 examples/gemtext.rs | 9 +++ src/daleth/lexer/mod.rs | 135 ++++++++++++++++++++++++-------- src/daleth/lexer/types.rs | 26 +++--- tests/gemtext.rs | 10 --- 9 files changed, 283 insertions(+), 56 deletions(-) rename {tests => examples}/bench.md (100%) rename {tests => examples}/bench.rs (99%) create mode 100644 examples/daleth.dlth create mode 100644 examples/daleth_lexer.rs rename {tests => examples}/gemtext.gmi (100%) create mode 100644 examples/gemtext.rs delete mode 100644 tests/gemtext.rs diff --git a/tests/bench.md b/examples/bench.md similarity index 100% rename from tests/bench.md rename to examples/bench.md diff --git a/tests/bench.rs b/examples/bench.rs similarity index 99% rename from tests/bench.rs rename to examples/bench.rs index f30d785..f3c9607 100644 --- a/tests/bench.rs +++ b/examples/bench.rs @@ -29,8 +29,7 @@ pub fn compress_zlib(data: &[u8]) -> std::io::Result> { c.finish() } -#[test] -fn bench() { +fn main() { let page = vec![ H("Heading 1".into(), Hl::One), H("Heading 2".into(), Hl::Two), diff --git a/examples/daleth.dlth b/examples/daleth.dlth new file mode 100644 index 0000000..2679bab --- /dev/null +++ b/examples/daleth.dlth @@ -0,0 +1,129 @@ +# multilines +# +# {text} - input is trimmed with indent +# +# {~n text} - n is number of minimum spaces to add after trimming with indent +# for each line +# +# {# text} - input not modified +# +# tag syntax +# +# tag: text body +# tag { multiline text body } +# body text always trimmed +# +# tag [ multiple tags body ] +# +# Arguments +# tag argument +# +# Tags without body and argument also supported + +meta "title": Daleth syntax concept +meta "description": This document describes Daleth syntax and some tags + +h1: TxtDot revolution +p: TxtDot is a cool project + +# If no tag is specified, then the 'el' tag is placed +This is element +br + +# if no tag is specified but a '{}' is present, then the 'p' tag is placed +# '\n' is deleted only in this format. If a break line is needed in a paragraph, use ' \n'. +{ + Check Dalet too + This is one paragraph +} + +{ This is another paragraph } + +# ( ) for argument +row "center" [ + link "https://github.com/txtdot/txtdot": Homepage + btn "https://example.com/donate" [ + # tag without body + img "https://example.com/donate.png" + Donate + ] +] + +# [] for multiple tags +row [ + [ + h2: Features + + ul [ + Server-side page simplification + Media proxy + Image compression with Sharp + Rendering client-side apps `Vanilla, React, Vue, etc` with webder + Search with SearXNG + Handy API endpoints + No client JavaScript + Some kind of Material Design 3 + Customization with plugins, see @txtdot/sdk and @txtdot/plugins + ] + + ] + + [ + h2: Running + + [ + h3: Dev + + # {} for multiline strings, indent is automatically trimmed + code { + npm install + npm run dev + } + + # {~n Text} n is number of minimum spaces + code "markdown" {~4 + this is codeblock + } + + # {# Text} Text after "`# " not modified + code "markdown" {# this is codeblock} + ] + + [ + h3: Production + + code { + npm install + npm run build + npm run start + } + ] + + [ + h3: Docker + + code: docker compose up -d + ] + + ] +] + +# Table has custom format if text used +# +| cells | - primary column +# | cells | - secondary column +# | Element | Description | - converts to +# tcol [ +# Element +# Description +# ] +table { + +| Tag | Description | + | h | Heading | + | p | Paragraph | + | img | Image | + | link | Link | + | btn | Button | + | ul | Unordered list | + | br | Line break | + +| quantity | 7 | +} diff --git a/examples/daleth_lexer.rs b/examples/daleth_lexer.rs new file mode 100644 index 0000000..e0215c0 --- /dev/null +++ b/examples/daleth_lexer.rs @@ -0,0 +1,27 @@ +use ariadne::{Color, Label, Report, ReportKind, Source}; +use chumsky::Parser; +use dalet::daleth::lexer::lexer; + +fn main() { + let src_file = "daleth.dlth"; + let src = include_str!("./daleth.dlth"); + + let parsed = lexer().parse(src); + + match parsed.into_result() { + Ok(t) => println!("{:#?}", t), + Err(e) => e.into_iter().for_each(|e| { + Report::build(ReportKind::Error, src_file, e.span().start) + .with_code("Compiler") + .with_message(e.to_string().clone()) + .with_label( + Label::new((src_file, e.span().into_range())) + .with_message(e.to_string()) + .with_color(Color::Red), + ) + .finish() + .print((src_file, Source::from(&src))) + .unwrap() + }), + } +} diff --git a/tests/gemtext.gmi b/examples/gemtext.gmi similarity index 100% rename from tests/gemtext.gmi rename to examples/gemtext.gmi diff --git a/examples/gemtext.rs b/examples/gemtext.rs new file mode 100644 index 0000000..1f237c8 --- /dev/null +++ b/examples/gemtext.rs @@ -0,0 +1,9 @@ +use dalet::parsers::gemtext::parse_gemtext; + +fn main() { + let text = include_str!("./gemtext.gmi"); + + let parsed = parse_gemtext(text).unwrap(); + + println!("{:#?}", parsed); +} diff --git a/src/daleth/lexer/mod.rs b/src/daleth/lexer/mod.rs index 5960d08..b25ec5f 100644 --- a/src/daleth/lexer/mod.rs +++ b/src/daleth/lexer/mod.rs @@ -4,61 +4,128 @@ pub mod types; pub fn lexer<'src>( ) -> impl Parser<'src, &'src str, Vec>>, extra::Err>> { - let symbol = choice(( - just("(").to(Token::LParen).labelled("LParen"), - just(")").to(Token::RParen).labelled("RParen"), - just("{").to(Token::LAngle).labelled("LAngle"), - just("}").to(Token::RAngle).labelled("RAngle"), - just("[").to(Token::LSquare).labelled("LSquare"), - just("]").to(Token::RSquare).labelled("RSquare"), - just(":").to(Token::Colon).labelled("Colon"), + let tag = choice(( + just("el").to(Token::El), + just("h").to(Token::H), + just("p").to(Token::P), + just("br").to(Token::Br), + just("ul").to(Token::Ul), + just("ol").to(Token::Ol), + just("row").to(Token::Row), + just("link").to(Token::Link), + just("navlink").to(Token::Navlink), + just("btn").to(Token::Btn), + just("navbtn").to(Token::Navbtn), + just("img").to(Token::Img), + just("table").to(Token::Table), + just("tcol").to(Token::Tcol), + just("tpcol").to(Token::Tpcol), + just("hr").to(Token::Hr), + just("b").to(Token::B), + just("i").to(Token::I), + just("bq").to(Token::Bq), + just("footlnk").to(Token::Footlnk), + just("footn").to(Token::Footn), + just("a").to(Token::A), + just("s").to(Token::S), + just("sup").to(Token::Sup), + just("sub").to(Token::Sub), + just("disc").to(Token::Disc), )) - .labelled("symbol"); + .or(choice(( + just("block").to(Token::Block), + just("carousel").to(Token::Carousel), + just("code").to(Token::Code), + just("pre").to(Token::Pre), + just("meta").to(Token::Meta), + ))) + .labelled("Tag"); - let number = text::int(10) - .from_str() - .unwrapped() - .map(Token::Number) - .labelled("number"); + let symbol = choice(( + // just("(").to(Token::LParen).labelled("("), + // just(")").to(Token::RParen).labelled(")"), + just("[").to(Token::LSquare).labelled("["), + just("]").to(Token::RSquare).labelled("]"), + // just(":").to(Token::Colon).labelled(":"), + )); + + let argument = { + let arg_escape = just('\\') + .ignore_then(just('"')) + .labelled("Escape sequence for argument"); + + let number = text::int(10) + .from_str() + .unwrapped() + .map(Token::NumberArgument) + .labelled("Number argument"); + + let text_argument = none_of("\"\n\\") + .or(arg_escape) + .repeated() + .to_slice() + .delimited_by(just("\""), just("\"")) + .map(Token::TextArgument) + .labelled("Text argument"); + + choice((number, text_argument)) + }; let textual = { let escape = just('\\') - .ignore_then(choice(( - just("\\`").to('`'.to_owned()), - just("\\]").to(']'.to_owned()), - ))) - .labelled("escape sequence"); + .ignore_then(just('}')) + .labelled("Multi-line escape sequence"); - let text = none_of("]\n") - .or(escape.clone()) + let text = none_of("\n").repeated().to_slice(); + + let text_body = text + .delimited_by(just(':'), just('\n')) + .map(Token::TextBody) + .labelled("One line text body"); + + let text_tag = text + .then_ignore(just('\n')) + .map(Token::TextTag) + .labelled("Text tag"); + + let multiline_text_body = none_of("}\\") + .or(escape) .repeated() - .to_slice() - .map(Token::Text); + .labelled("Body of multiline text"); - let multiline_text = none_of("`").or(escape).repeated(); + let mlms_n = just("{~") + .ignore_then(text::int(10).from_str().unwrapped()) + .labelled("Minimum spaces number"); - let mltext = multiline_text + let mlmstext = mlms_n + .then(multiline_text_body.clone().to_slice()) + .then_ignore(just("}")) + .map(|(n, t)| Token::MLMSText(n, t)) + .labelled("Multi line text with min spaces"); + + let mltext = multiline_text_body .clone() - .delimited_by(just('`'), just('`')) .to_slice() + .delimited_by(just('{'), just('}')) .map(Token::MLText) - .labelled("multiline text"); + .labelled("Multiline text"); - let mlmstext = multiline_text - .delimited_by(just("`#"), just('`')) + let rmltext = multiline_text_body .to_slice() + .delimited_by(just("{#"), just('}')) .map(Token::RMLText) - .labelled("raw multiline text"); + .labelled("Raw multiline text"); - choice((mltext, mlmstext, text)) + choice((mlmstext, mltext, rmltext, text_body, text_tag)) }; - let comment = just("#") - .then(none_of("\n").repeated()) + let comment = none_of("\n") + .repeated() .to_slice() + .delimited_by(just('#'), just('\n')) .map(Token::Comment); - let token = choice((symbol, number, textual, comment)); + let token = choice((comment, symbol, tag, argument, textual)); token .map_with(|t, e| (t, e.span())) diff --git a/src/daleth/lexer/types.rs b/src/daleth/lexer/types.rs index 9e2a903..c189a1a 100644 --- a/src/daleth/lexer/types.rs +++ b/src/daleth/lexer/types.rs @@ -7,23 +7,22 @@ pub type Spanned = (T, Span); pub enum Token<'src> { // Symbols /// ( - LParen, + // LParen, /// ) - RParen, - /// { - LAngle, - /// } - RAngle, + // RParen, /// [ LSquare, /// ] RSquare, /// : - Colon, + // Colon, - // Values - Number(u8), - Text(&'src str), + // Arguments + NumberArgument(u8), + TextArgument(&'src str), + + // Body + TextBody(&'src str), /// Multi Line text MLText(&'src str), /// Multi Line with min spaces text @@ -33,6 +32,7 @@ pub enum Token<'src> { /// Special Comment(&'src str), + TextTag(&'src str), // Tags El, @@ -67,3 +67,9 @@ pub enum Token<'src> { Pre, Meta, } + +#[derive(Clone, Debug, PartialEq)] +pub enum Argument<'src> { + Number(u8), + Argument(&'src str), +} diff --git a/tests/gemtext.rs b/tests/gemtext.rs deleted file mode 100644 index beaf33b..0000000 --- a/tests/gemtext.rs +++ /dev/null @@ -1,10 +0,0 @@ -use dalet::parsers::gemtext::parse_gemtext; - -#[test] -fn gem_text() { - let text = include_str!("./gemtext.gmi"); - - let _ = parse_gemtext(text).unwrap(); - - // println!("{:#?}", parsed); -}