feat: formatter

TODO: fix tags with optional body formatter
2024-11-27 02:46:23 +03:00 · 2024-08-09 18:24:42 +03:00 · 2024-08-09 18:24:42 +03:00 · 74f42aa314
commit 74f42aa314
parent dfe50cd0f4
12 changed files with 281 additions and 71 deletions
--- a/examples/bench.rs
+++ b/examples/bench.rs
@ -1,6 +1,6 @@
 use dalet::{
    daletpack::*,
-    typed::{Hl, TNArg, Tag::*},
+    typed::{Hl, TNullArg, Tag::*},
 };
 use flate2::Compression;
 use std::io::Write;
@ -41,7 +41,7 @@ fn main() {
        ]
        .into()),
        Br,
-        Code("Hello world".into(), TNArg::Null),
+        Code("Hello world".into(), TNullArg::Null),
        Br,
        Ul(vec![
            El("abc".into()),
--- a/examples/daleth.dlth
+++ b/examples/daleth.dlth
@ -5,7 +5,8 @@
 # {~n text} - n is number of minimum spaces to add after trimming with indent
 # for each line
 #
-# {# text} - input not modified
+# {#text} - input not modified
 #
 #
 # tag syntax
 #
@ -19,6 +20,13 @@
 # tag argument
 #
 # Tags without body and argument also supported
 #
 #
 # custom no tag syntax
 #
 # {-text} - paragraph, text indent is trimmed
 # [[tags]] - element tag with body of multiple tags
 # text - element tag with text body
 meta "title": Daleth syntax concept
 meta "description": This document describes Daleth syntax and some tags
@ -27,19 +35,19 @@ h1: TxtDot revolution
 p: TxtDot is a cool project
 # If no tag is specified, then the 'el' tag is placed
 This is element
 br
-# if no tag is specified but a '{}' is present, then the 'p' tag is placed
+# if no tag is specified but a '{- text}' is present, then the 'p' tag is placed
-# '\n' is deleted only in this format. If a break line is needed in a paragraph, use '  \n'.
+# '\n' is deleted in this format. If a break line is needed in a paragraph, use '  \n'.
-{
+{-
  Check Dalet too
  This is one paragraph
 }
-{ This is another paragraph }
+{- This is another paragraph ({- text\}) }
 # ( ) for argument
 row "center" [
  link "https://github.com/txtdot/txtdot": Homepage
  btn "https://example.com/donate" [
@ -51,7 +59,9 @@ row "center" [
 # [] for multiple tags
 row [
-  [
+  # if no tag is specified but a '[[]]' is present, then the 'el' tag
  # with multiple tags body placed
  [[
    h2: Features
    ul [
@ -65,13 +75,12 @@ row [
      Some kind of Material Design 3
      Customization with plugins, see @txtdot/sdk and @txtdot/plugins
    ]
  ]]
-  ]
+  [[
  [
    h2: Running
-    [
+    [[
      h3: Dev
      # {} for multiline strings, indent is automatically trimmed
@ -87,25 +96,23 @@ row [
      # {# Text} Text after "`# " not modified
      code "markdown" {#     this is codeblock}
-    ]
+    ]]
-    [
+    [[
      h3: Production
      code {
        npm install
        npm run build
        npm run start
      }
-    ]
+    ]]
-    [
+    [[
      h3: Docker
      code: docker compose up -d
-    ]
+    ]]
-  ]
+  ]]
 ]
 # Table has custom format if text used
--- a/examples/daleth_lexer.rs
+++ b/examples/daleth_lexer.rs
@ -1,6 +1,6 @@
 use ariadne::{Color, Label, Report, ReportKind, Source};
 use chumsky::Parser;
-use dalet::daleth::lexer::lexer;
+use dalet::daleth::{format::format, lexer::lexer};
 fn main() {
    let src_file = "daleth.dlth";
@ -9,7 +9,10 @@ fn main() {
    let parsed = lexer().parse(src);
    match parsed.into_result() {
-        Ok(t) => println!("{:#?}", t),
+        Ok(t) => {
            println!("{:#?}", t);
            println!("{}", format(&t));
        }
        Err(e) => e.into_iter().for_each(|e| {
            Report::build(ReportKind::Error, src_file, e.span().start)
                .with_code("Compiler")
@ -23,5 +26,5 @@ fn main() {
                .print((src_file, Source::from(&src)))
                .unwrap()
        }),
-    }
+    };
 }
--- a/src/daleth/format.rs
+++ b/src/daleth/format.rs
@ -0,0 +1,136 @@
 use super::{
    lexer::types::{Spanned, Token},
    utils::set_indent,
 };
 fn nl_needed<'src>(last2: Option<&Token<'src>>, last1: Option<&Token<'src>>) -> bool {
    if let Some(last1) = last1 {
        if *last1 == Token::Br {
            return true;
        }
        if *last1 == Token::Hr {
            return true;
        }
        if let Some(last2) = last2 {
            if *last2 == Token::Img {
                return true;
            }
        }
    }
    false
 }
 pub fn format<'src>(spanned_tokens: &Vec<Spanned<Token<'src>>>) -> String {
    let mut current_indent: usize = 0;
    let mut formatted = String::new();
    let len = spanned_tokens.len();
    for i in 0..len {
        let last2 = {
            if i < 2 {
                None
            } else {
                spanned_tokens.get(i - 2).map(|t| &t.0)
            }
        };
        let last1 = {
            if i < 1 {
                None
            } else {
                spanned_tokens.get(i - 1).map(|t| &t.0)
            }
        };
        if nl_needed(last2, last1) {
            formatted.push_str("\n");
        };
        let spanned_token = &spanned_tokens[i].0;
        let to_push = match spanned_token {
            Token::LSquare => {
                current_indent += 1;
                " [\n".to_owned()
            }
            Token::RSquare => {
                current_indent -= 1;
                format!("{}\n", set_indent("]", current_indent))
            }
            Token::NumberArgument(n) => format!("{n}"),
            Token::TextArgument(t) => format!(" \"{t}\""),
            Token::TextBody(t) => format!(": {}\n", t.trim()),
            Token::MLText(t) => format!(
                " {{\n{}\n{}\n",
                set_indent(t, current_indent + 1),
                set_indent("}", current_indent)
            ),
            Token::MLMSText(n, t) => format!(
                " {{~{n}\n{}\n{}\n",
                set_indent(t, current_indent + 1),
                set_indent("}", current_indent)
            ),
            Token::RMLText(t) => format!(" {{#{t}}}\n"),
            Token::Comment(c) => format!("{}\n", set_indent(&format!("# {c}"), current_indent)),
            Token::TextTag(t) => format!("{}\n", set_indent(t, current_indent)),
            Token::El => set_indent("el", current_indent),
            Token::H => set_indent("h", current_indent),
            Token::P => set_indent("p", current_indent),
            Token::Br => set_indent("br", current_indent),
            Token::Ul => set_indent("ul", current_indent),
            Token::Ol => set_indent("ol", current_indent),
            Token::Row => set_indent("row", current_indent),
            Token::Link => set_indent("link", current_indent),
            Token::Navlink => set_indent("navlink", current_indent),
            Token::Btn => set_indent("btn", current_indent),
            Token::Navbtn => set_indent("navbtn", current_indent),
            Token::Img => set_indent("img", current_indent),
            Token::Table => set_indent("table", current_indent),
            Token::Tcol => set_indent("tcol", current_indent),
            Token::Tpcol => set_indent("tpcol", current_indent),
            Token::Hr => set_indent("hr", current_indent),
            Token::B => set_indent("b", current_indent),
            Token::I => set_indent("i", current_indent),
            Token::Bq => set_indent("bq", current_indent),
            Token::Footlnk => set_indent("footlnk", current_indent),
            Token::Footn => set_indent("footn", current_indent),
            Token::A => set_indent("a", current_indent),
            Token::S => set_indent("s", current_indent),
            Token::Sup => set_indent("sup", current_indent),
            Token::Sub => set_indent("sub", current_indent),
            Token::Disc => set_indent("disc", current_indent),
            Token::Block => set_indent("block", current_indent),
            Token::Carousel => set_indent("carousel", current_indent),
            Token::Code => set_indent("code", current_indent),
            Token::Pre => set_indent("pre", current_indent),
            Token::Meta => set_indent("meta", current_indent),
            Token::ElOpen => {
                let s = set_indent("[[", current_indent);
                current_indent += 1;
                format!("{s}\n")
            }
            Token::ElClose => {
                current_indent -= 1;
                format!("{}\n", set_indent("]]", current_indent))
            }
            Token::Paragraph(t) => format!(
                "{{-\n{}\n{}\n",
                set_indent(t, current_indent + 1),
                set_indent("}", current_indent)
            ),
            Token::EmptyLine => "\n".to_owned(),
        };
        formatted.push_str(&to_push);
    }
    formatted.trim().to_owned()
 }
--- a/src/daleth/lexer/mod.rs
+++ b/src/daleth/lexer/mod.rs
@ -42,11 +42,10 @@ pub fn lexer<'src>(
    .labelled("Tag");
    let symbol = choice((
-        // just("(").to(Token::LParen).labelled("("),
+        just("[[").to(Token::ElOpen).labelled("[["),
-        // just(")").to(Token::RParen).labelled(")"),
+        just("]]").to(Token::ElClose).labelled("]]"),
        just("[").to(Token::LSquare).labelled("["),
        just("]").to(Token::RSquare).labelled("]"),
        // just(":").to(Token::Colon).labelled(":"),
    ));
    let argument = {
@ -64,7 +63,7 @@ pub fn lexer<'src>(
            .or(arg_escape)
            .repeated()
            .to_slice()
-            .delimited_by(just("\""), just("\""))
+            .delimited_by(just('"'), just('"'))
            .map(Token::TextArgument)
            .labelled("Text argument");
@ -78,8 +77,8 @@ pub fn lexer<'src>(
        let text = none_of("\n").repeated().to_slice();
-        let text_body = text
+        let text_body = just(':')
-            .delimited_by(just(':'), just('\n'))
+            .ignore_then(text)
            .map(Token::TextBody)
            .labelled("One line text body");
@ -93,15 +92,12 @@ pub fn lexer<'src>(
            .repeated()
            .labelled("Body of multiline text");
-        let mlms_n = just("{~")
+        let paragraph = multiline_text_body
-            .ignore_then(text::int(10).from_str().unwrapped())
+            .clone()
-            .labelled("Minimum spaces number");
+            .to_slice()
-
+            .delimited_by(just("{-"), just("}"))
-        let mlmstext = mlms_n
+            .map(Token::Paragraph)
-            .then(multiline_text_body.clone().to_slice())
+            .labelled("Paragraph syntax");
            .then_ignore(just("}"))
            .map(|(n, t)| Token::MLMSText(n, t))
            .labelled("Multi line text with min spaces");
        let mltext = multiline_text_body
            .clone()
@ -110,26 +106,40 @@ pub fn lexer<'src>(
            .map(Token::MLText)
            .labelled("Multiline text");
        let mlmstext = {
            let mlms_n = just("{~")
                .ignore_then(text::int(10).from_str().unwrapped())
                .labelled("Minimum spaces number");
            mlms_n
                .then(multiline_text_body.clone().to_slice())
                .then_ignore(just("}"))
                .map(|(n, t)| Token::MLMSText(n, t))
                .labelled("Multi line text with min spaces")
        };
        let rmltext = multiline_text_body
            .to_slice()
            .delimited_by(just("{#"), just('}'))
            .map(Token::RMLText)
            .labelled("Raw multiline text");
-        choice((mlmstext, mltext, rmltext, text_body, text_tag))
+        choice((paragraph, mlmstext, rmltext, mltext, text_body, text_tag))
    };
-    let comment = none_of("\n")
+    let comment = just('#')
-        .repeated()
+        .ignore_then(none_of("\n").repeated().to_slice())
        .to_slice()
        .delimited_by(just('#'), just('\n'))
        .map(Token::Comment);
-    let token = choice((comment, symbol, tag, argument, textual));
+    let empty_line = text::inline_whitespace()
        .delimited_by(text::newline(), text::newline())
        .to(Token::EmptyLine);
    let token = choice((empty_line.clone(), comment, symbol, tag, argument, textual));
    token
        .padded_by(text::whitespace().and_is(empty_line.not()).or_not())
        .map_with(|t, e| (t, e.span()))
        .padded()
        .repeated()
        .collect()
 }
--- a/src/daleth/lexer/types.rs
+++ b/src/daleth/lexer/types.rs
@ -6,16 +6,14 @@ pub type Spanned<T> = (T, Span);
 #[derive(Clone, Debug, PartialEq)]
 pub enum Token<'src> {
    // Symbols
    /// (
    // LParen,
    /// )
    // RParen,
    /// [
    LSquare,
    /// ]
    RSquare,
-    /// :
+    /// [[
-    // Colon,
+    ElOpen,
    /// ]]
    ElClose,
    // Arguments
    NumberArgument(u8),
@ -31,8 +29,12 @@ pub enum Token<'src> {
    RMLText(&'src str),
    /// Special
    Comment(&'src str),
    TextTag(&'src str),
    Paragraph(&'src str),
    /// Special removed before parse
    Comment(&'src str),
    EmptyLine,
    // Tags
    El,
@ -68,8 +70,8 @@ pub enum Token<'src> {
    Meta,
 }
-#[derive(Clone, Debug, PartialEq)]
+impl<'src> From<Spanned<Token<'src>>> for Token<'src> {
-pub enum Argument<'src> {
+    fn from(value: Spanned<Token<'src>>) -> Self {
-    Number(u8),
+        value.0
-    Argument(&'src str),
+    }
 }
--- a/src/daleth/mod.rs
+++ b/src/daleth/mod.rs
@ -1 +1,3 @@
 pub mod format;
 pub mod lexer;
 pub mod utils;
--- a/src/daleth/utils.rs
+++ b/src/daleth/utils.rs
@ -0,0 +1,52 @@
 pub fn trim_indent(input: &str) -> String {
    let lines: Vec<&str> = input.lines().collect();
    // Find the minimum indentation of non-empty lines
    let min_indent = lines
        .iter()
        .filter(|line| !line.trim().is_empty())
        .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
        .min()
        .unwrap_or(0);
    // Trim the leading whitespace from each line by the minimum indentation
    let trimmed_lines: Vec<&str> = lines
        .into_iter()
        .map(|line| {
            if line.len() < min_indent {
                line
            } else {
                &line[min_indent..]
            }
        })
        .collect();
    trim_newline(&trimmed_lines.join("\n")).to_owned()
 }
 pub fn set_indent(input: &str, indent: usize) -> String {
    prepend_indent(&trim_indent(input), &"    ".repeat(indent))
 }
 fn trim_newline<'a>(s: &'a str) -> &'a str {
    let mut trim_start = 0;
    for start_char in s.chars() {
        if start_char != '\n' && start_char != '\r' {
            break;
        }
        trim_start += 1;
    }
    &s[(trim_start)..].trim_end()
 }
 fn prepend_indent(input: &str, indent: &str) -> String {
    let lines: Vec<String> = input
        .lines()
        .map(|line| format!("{}{}", indent, line))
        .collect();
    lines.join("\n")
 }
--- a/src/parsers/gemtext.rs
+++ b/src/parsers/gemtext.rs
@ -1,5 +1,5 @@
 use crate::typed::{
-    Body, Hl, Page, TNArg,
+    Body, Hl, Page, TNullArg,
    Tag::{self, *},
 };
@ -32,9 +32,7 @@ pub fn parse_gemtext(s: &str) -> Result<Page, GemTextParseError> {
            let url = body.next().ok_or(GemTextParseError::InvalidLink)?.trim();
            match body.next() {
-                Some(label) => page.push(P(
+                Some(label) => page.push(P(vec![Navlink(label.trim().into(), url.into())].into())),
                    vec![Navlink(label.trim().into(), url.into())].into()
                )),
                None => page.push(P(vec![Navlink(Body::Null, url.into())].into())),
            };
        } else if line.starts_with("# ") {
@ -55,7 +53,7 @@ pub fn parse_gemtext(s: &str) -> Result<Page, GemTextParseError> {
            page.push(Bq(body.into()));
        } else if line.starts_with("```") {
            if preformatted {
-                page.push(Code(preformatted_text.join("\n"), TNArg::Null));
+                page.push(Code(preformatted_text.join("\n"), TNullArg::Null));
                preformatted_text.clear();
            }
--- a/src/traits/from_daletl.rs
+++ b/src/traits/from_daletl.rs
@ -70,13 +70,13 @@ impl TryFrom<DlArgument> for AlignArg {
    }
 }
-impl TryFrom<DlArgument> for TNArg {
+impl TryFrom<DlArgument> for TNullArg {
    type Error = ConversionError;
    fn try_from(value: DlArgument) -> Result<Self, Self::Error> {
        match value {
-            DlArgument::Text(t) => Ok(TNArg::Text(t)),
+            DlArgument::Text(t) => Ok(TNullArg::Text(t)),
-            DlArgument::Null => Ok(TNArg::Null),
+            DlArgument::Null => Ok(TNullArg::Null),
            _ => Err(ConversionError),
        }
    }
--- a/src/traits/to_daletl.rs
+++ b/src/traits/to_daletl.rs
@ -64,11 +64,11 @@ impl From<AlignArg> for DlArgument {
    }
 }
-impl From<TNArg> for DlArgument {
+impl From<TNullArg> for DlArgument {
-    fn from(item: TNArg) -> DlArgument {
+    fn from(item: TNullArg) -> DlArgument {
        match item {
-            TNArg::Text(s) => s.into(),
+            TNullArg::Text(s) => s.into(),
-            TNArg::Null => NA,
+            TNullArg::Null => NA,
        }
    }
 }
--- a/src/typed.rs
+++ b/src/typed.rs
@ -44,7 +44,7 @@ pub enum Tag {
    Disc(NNBody),
    Block(NNBody, AlignArg),
    Carousel(Vec<Tag>),
-    Code(TBody, TNArg),
+    Code(TBody, TNullArg),
    Pre(TBody),
    Meta(TBody, TArg),
 }
@ -73,7 +73,7 @@ pub enum Arg {
 }
 #[derive(AutoFrom, Debug, Clone, PartialEq, Eq)]
-pub enum TNArg {
+pub enum TNullArg {
    Text(String),
    Null,
 }