feat: full lexer and example

2024-11-21 16:26:21 +03:00 · 2024-08-08 12:25:12 +03:00 · 2024-08-08 12:25:12 +03:00 · dfe50cd0f4
commit dfe50cd0f4
parent edf799904e
9 changed files with 283 additions and 56 deletions
--- a/examples/bench.md
+++ b/examples/bench.md
--- a/examples/bench.rs
+++ b/examples/bench.rs
@ -29,8 +29,7 @@ pub fn compress_zlib(data: &[u8]) -> std::io::Result<Vec<u8>> {
    c.finish()
 }

-#[test]
-fn bench() {
+fn main() {
    let page = vec![
        H("Heading 1".into(), Hl::One),
        H("Heading 2".into(), Hl::Two),
--- a/examples/daleth.dlth
+++ b/examples/daleth.dlth
@ -0,0 +1,129 @@
+# multilines
+#
+# {text} - input is trimmed with indent
+#
+# {~n text} - n is number of minimum spaces to add after trimming with indent
+# for each line
+#
+# {# text} - input not modified
+#
+# tag syntax
+#
+# tag: text body
+# tag { multiline text body }
+# body text always trimmed
+#
+# tag [ multiple tags body ]
+#
+# Arguments
+# tag argument
+#
+# Tags without body and argument also supported
+
+meta "title": Daleth syntax concept
+meta "description": This document describes Daleth syntax and some tags
+
+h1: TxtDot revolution
+p: TxtDot is a cool project
+
+# If no tag is specified, then the 'el' tag is placed
+This is element
+br
+
+# if no tag is specified but a '{}' is present, then the 'p' tag is placed
+# '\n' is deleted only in this format. If a break line is needed in a paragraph, use '  \n'.
+{
+  Check Dalet too
+  This is one paragraph
+}
+
+{ This is another paragraph }
+
+# ( ) for argument
+row "center" [
+  link "https://github.com/txtdot/txtdot": Homepage
+  btn "https://example.com/donate" [
+    # tag without body
+    img "https://example.com/donate.png"
+    Donate
+  ]
+]
+
+# [] for multiple tags
+row [
+  [
+    h2: Features
+
+    ul [
+      Server-side page simplification
+      Media proxy
+      Image compression with Sharp
+      Rendering client-side apps `Vanilla, React, Vue, etc` with webder
+      Search with SearXNG
+      Handy API endpoints
+      No client JavaScript
+      Some kind of Material Design 3
+      Customization with plugins, see @txtdot/sdk and @txtdot/plugins
+    ]
+
+  ]
+
+  [
+    h2: Running
+
+    [
+      h3: Dev
+
+      # {} for multiline strings, indent is automatically trimmed
+      code {
+        npm install
+        npm run dev
+      }
+
+      # {~n Text} n is number of minimum spaces
+      code "markdown" {~4
+        this is codeblock
+      }
+
+      # {# Text} Text after "`# " not modified
+      code "markdown" {#     this is codeblock}
+    ]
+
+    [
+      h3: Production
+
+      code {
+        npm install
+        npm run build
+        npm run start
+      }
+    ]
+
+    [
+      h3: Docker
+
+      code: docker compose up -d
+    ]
+
+  ]
+]
+
+# Table has custom format if text used
+# +| cells | - primary column
+#  | cells | - secondary column
+#  | Element | Description | - converts to
+#  tcol [
+#    Element
+#    Description
+#  ]
+table {
+  +| Tag      | Description     |
+   | h        | Heading         |
+   | p        | Paragraph       |
+   | img      | Image           |
+   | link     | Link            |
+   | btn      | Button          |
+   | ul       | Unordered list  |
+   | br       | Line break      |
+  +| quantity | 7               |
+}
--- a/examples/daleth_lexer.rs
+++ b/examples/daleth_lexer.rs
@ -0,0 +1,27 @@
+use ariadne::{Color, Label, Report, ReportKind, Source};
+use chumsky::Parser;
+use dalet::daleth::lexer::lexer;
+
+fn main() {
+    let src_file = "daleth.dlth";
+    let src = include_str!("./daleth.dlth");
+
+    let parsed = lexer().parse(src);
+
+    match parsed.into_result() {
+        Ok(t) => println!("{:#?}", t),
+        Err(e) => e.into_iter().for_each(|e| {
+            Report::build(ReportKind::Error, src_file, e.span().start)
+                .with_code("Compiler")
+                .with_message(e.to_string().clone())
+                .with_label(
+                    Label::new((src_file, e.span().into_range()))
+                        .with_message(e.to_string())
+                        .with_color(Color::Red),
+                )
+                .finish()
+                .print((src_file, Source::from(&src)))
+                .unwrap()
+        }),
+    }
+}
--- a/examples/gemtext.gmi
+++ b/examples/gemtext.gmi
--- a/examples/gemtext.rs
+++ b/examples/gemtext.rs
@ -0,0 +1,9 @@
+use dalet::parsers::gemtext::parse_gemtext;
+
+fn main() {
+    let text = include_str!("./gemtext.gmi");
+
+    let parsed = parse_gemtext(text).unwrap();
+
+    println!("{:#?}", parsed);
+}
--- a/src/daleth/lexer/mod.rs
+++ b/src/daleth/lexer/mod.rs
@ -4,61 +4,128 @@ pub mod types;

 pub fn lexer<'src>(
 ) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
-    let symbol = choice((
-        just("(").to(Token::LParen).labelled("LParen"),
-        just(")").to(Token::RParen).labelled("RParen"),
-        just("{").to(Token::LAngle).labelled("LAngle"),
-        just("}").to(Token::RAngle).labelled("RAngle"),
-        just("[").to(Token::LSquare).labelled("LSquare"),
-        just("]").to(Token::RSquare).labelled("RSquare"),
-        just(":").to(Token::Colon).labelled("Colon"),
+    let tag = choice((
+        just("el").to(Token::El),
+        just("h").to(Token::H),
+        just("p").to(Token::P),
+        just("br").to(Token::Br),
+        just("ul").to(Token::Ul),
+        just("ol").to(Token::Ol),
+        just("row").to(Token::Row),
+        just("link").to(Token::Link),
+        just("navlink").to(Token::Navlink),
+        just("btn").to(Token::Btn),
+        just("navbtn").to(Token::Navbtn),
+        just("img").to(Token::Img),
+        just("table").to(Token::Table),
+        just("tcol").to(Token::Tcol),
+        just("tpcol").to(Token::Tpcol),
+        just("hr").to(Token::Hr),
+        just("b").to(Token::B),
+        just("i").to(Token::I),
+        just("bq").to(Token::Bq),
+        just("footlnk").to(Token::Footlnk),
+        just("footn").to(Token::Footn),
+        just("a").to(Token::A),
+        just("s").to(Token::S),
+        just("sup").to(Token::Sup),
+        just("sub").to(Token::Sub),
+        just("disc").to(Token::Disc),
    ))
-    .labelled("symbol");
+    .or(choice((
+        just("block").to(Token::Block),
+        just("carousel").to(Token::Carousel),
+        just("code").to(Token::Code),
+        just("pre").to(Token::Pre),
+        just("meta").to(Token::Meta),
+    )))
+    .labelled("Tag");
+
+    let symbol = choice((
+        // just("(").to(Token::LParen).labelled("("),
+        // just(")").to(Token::RParen).labelled(")"),
+        just("[").to(Token::LSquare).labelled("["),
+        just("]").to(Token::RSquare).labelled("]"),
+        // just(":").to(Token::Colon).labelled(":"),
+    ));
+
+    let argument = {
+        let arg_escape = just('\\')
+            .ignore_then(just('"'))
+            .labelled("Escape sequence for argument");

        let number = text::int(10)
            .from_str()
            .unwrapped()
-        .map(Token::Number)
-        .labelled("number");
+            .map(Token::NumberArgument)
+            .labelled("Number argument");
+
+        let text_argument = none_of("\"\n\\")
+            .or(arg_escape)
+            .repeated()
+            .to_slice()
+            .delimited_by(just("\""), just("\""))
+            .map(Token::TextArgument)
+            .labelled("Text argument");
+
+        choice((number, text_argument))
+    };

    let textual = {
        let escape = just('\\')
-            .ignore_then(choice((
-                just("\\`").to('`'.to_owned()),
-                just("\\]").to(']'.to_owned()),
-            )))
-            .labelled("escape sequence");
+            .ignore_then(just('}'))
+            .labelled("Multi-line escape sequence");

-        let text = none_of("]\n")
-            .or(escape.clone())
+        let text = none_of("\n").repeated().to_slice();
+
+        let text_body = text
+            .delimited_by(just(':'), just('\n'))
+            .map(Token::TextBody)
+            .labelled("One line text body");
+
+        let text_tag = text
+            .then_ignore(just('\n'))
+            .map(Token::TextTag)
+            .labelled("Text tag");
+
+        let multiline_text_body = none_of("}\\")
+            .or(escape)
            .repeated()
-            .to_slice()
-            .map(Token::Text);
+            .labelled("Body of multiline text");

-        let multiline_text = none_of("`").or(escape).repeated();
+        let mlms_n = just("{~")
+            .ignore_then(text::int(10).from_str().unwrapped())
+            .labelled("Minimum spaces number");

-        let mltext = multiline_text
+        let mlmstext = mlms_n
+            .then(multiline_text_body.clone().to_slice())
+            .then_ignore(just("}"))
+            .map(|(n, t)| Token::MLMSText(n, t))
+            .labelled("Multi line text with min spaces");
+
+        let mltext = multiline_text_body
            .clone()
-            .delimited_by(just('`'), just('`'))
            .to_slice()
+            .delimited_by(just('{'), just('}'))
            .map(Token::MLText)
-            .labelled("multiline text");
+            .labelled("Multiline text");

-        let mlmstext = multiline_text
-            .delimited_by(just("`#"), just('`'))
+        let rmltext = multiline_text_body
            .to_slice()
+            .delimited_by(just("{#"), just('}'))
            .map(Token::RMLText)
-            .labelled("raw multiline text");
+            .labelled("Raw multiline text");

-        choice((mltext, mlmstext, text))
+        choice((mlmstext, mltext, rmltext, text_body, text_tag))
    };

-    let comment = just("#")
-        .then(none_of("\n").repeated())
+    let comment = none_of("\n")
+        .repeated()
        .to_slice()
+        .delimited_by(just('#'), just('\n'))
        .map(Token::Comment);

-    let token = choice((symbol, number, textual, comment));
+    let token = choice((comment, symbol, tag, argument, textual));

    token
        .map_with(|t, e| (t, e.span()))
--- a/src/daleth/lexer/types.rs
+++ b/src/daleth/lexer/types.rs
@ -7,23 +7,22 @@ pub type Spanned<T> = (T, Span);
 pub enum Token<'src> {
    // Symbols
    /// (
-    LParen,
+    // LParen,
    /// )
-    RParen,
-    /// {
-    LAngle,
-    /// }
-    RAngle,
+    // RParen,
    /// [
    LSquare,
    /// ]
    RSquare,
    /// :
-    Colon,
+    // Colon,

-    // Values
-    Number(u8),
-    Text(&'src str),
+    // Arguments
+    NumberArgument(u8),
+    TextArgument(&'src str),
+
+    // Body
+    TextBody(&'src str),
    /// Multi Line text
    MLText(&'src str),
    /// Multi Line with min spaces text
@ -33,6 +32,7 @@ pub enum Token<'src> {

    /// Special
    Comment(&'src str),
+    TextTag(&'src str),

    // Tags
    El,
@ -67,3 +67,9 @@ pub enum Token<'src> {
    Pre,
    Meta,
 }
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum Argument<'src> {
+    Number(u8),
+    Argument(&'src str),
+}
--- a/tests/gemtext.rs
+++ b/tests/gemtext.rs
@ -1,10 +0,0 @@
-use dalet::parsers::gemtext::parse_gemtext;
-
-#[test]
-fn gem_text() {
-    let text = include_str!("./gemtext.gmi");
-
-    let _ = parse_gemtext(text).unwrap();
-
-    // println!("{:#?}", parsed);
-}