feat: formatter

TODO: fix tags with optional body formatter
2025-03-12 10:44:37 +03:00 · 2024-08-09 18:24:42 +03:00 · 2024-08-09 18:24:42 +03:00 · 74f42aa314
commit 74f42aa314
parent dfe50cd0f4
12 changed files with 281 additions and 71 deletions
--- a/examples/bench.rs
+++ b/examples/bench.rs
@ -1,6 +1,6 @@
 use dalet::{
    daletpack::*,
-    typed::{Hl, TNArg, Tag::*},
+    typed::{Hl, TNullArg, Tag::*},
 };
 use flate2::Compression;
 use std::io::Write;
@ -41,7 +41,7 @@ fn main() {
        ]
        .into()),
        Br,
-        Code("Hello world".into(), TNArg::Null),
+        Code("Hello world".into(), TNullArg::Null),
        Br,
        Ul(vec![
            El("abc".into()),
--- a/examples/daleth.dlth
+++ b/examples/daleth.dlth
@ -5,7 +5,8 @@
 # {~n text} - n is number of minimum spaces to add after trimming with indent
 # for each line
 #
-# {# text} - input not modified
+# {#text} - input not modified
+#
 #
 # tag syntax
 #
@ -19,6 +20,13 @@
 # tag argument
 #
 # Tags without body and argument also supported
+#
+#
+# custom no tag syntax
+#
+# {-text} - paragraph, text indent is trimmed
+# [[tags]] - element tag with body of multiple tags
+# text - element tag with text body

 meta "title": Daleth syntax concept
 meta "description": This document describes Daleth syntax and some tags
@ -27,19 +35,19 @@ h1: TxtDot revolution
 p: TxtDot is a cool project

 # If no tag is specified, then the 'el' tag is placed
+
 This is element
 br

-# if no tag is specified but a '{}' is present, then the 'p' tag is placed
-# '\n' is deleted only in this format. If a break line is needed in a paragraph, use '  \n'.
-{
+# if no tag is specified but a '{- text}' is present, then the 'p' tag is placed
+# '\n' is deleted in this format. If a break line is needed in a paragraph, use '  \n'.
+{-
  Check Dalet too
  This is one paragraph
 }

-{ This is another paragraph }
+{- This is another paragraph ({- text\}) }

-# ( ) for argument
 row "center" [
  link "https://github.com/txtdot/txtdot": Homepage
  btn "https://example.com/donate" [
@ -51,7 +59,9 @@ row "center" [

 # [] for multiple tags
 row [
-  [
+  # if no tag is specified but a '[[]]' is present, then the 'el' tag
+  # with multiple tags body placed
+  [[
    h2: Features

    ul [
@ -65,13 +75,12 @@ row [
      Some kind of Material Design 3
      Customization with plugins, see @txtdot/sdk and @txtdot/plugins
    ]
+  ]]

-  ]
-
-  [
+  [[
    h2: Running

-    [
+    [[
      h3: Dev

      # {} for multiline strings, indent is automatically trimmed
@ -87,25 +96,23 @@ row [

      # {# Text} Text after "`# " not modified
      code "markdown" {#     this is codeblock}
-    ]
+    ]]

-    [
+    [[
      h3: Production
-
      code {
        npm install
        npm run build
        npm run start
      }
-    ]
+    ]]

-    [
+    [[
      h3: Docker
-
      code: docker compose up -d
-    ]
+    ]]

-  ]
+  ]]
 ]

 # Table has custom format if text used
--- a/examples/daleth_lexer.rs
+++ b/examples/daleth_lexer.rs
@ -1,6 +1,6 @@
 use ariadne::{Color, Label, Report, ReportKind, Source};
 use chumsky::Parser;
-use dalet::daleth::lexer::lexer;
+use dalet::daleth::{format::format, lexer::lexer};

 fn main() {
    let src_file = "daleth.dlth";
@ -9,7 +9,10 @@ fn main() {
    let parsed = lexer().parse(src);

    match parsed.into_result() {
-        Ok(t) => println!("{:#?}", t),
+        Ok(t) => {
+            println!("{:#?}", t);
+            println!("{}", format(&t));
+        }
        Err(e) => e.into_iter().for_each(|e| {
            Report::build(ReportKind::Error, src_file, e.span().start)
                .with_code("Compiler")
@ -23,5 +26,5 @@ fn main() {
                .print((src_file, Source::from(&src)))
                .unwrap()
        }),
-    }
+    };
 }
--- a/src/daleth/format.rs
+++ b/src/daleth/format.rs
@ -0,0 +1,136 @@
+use super::{
+    lexer::types::{Spanned, Token},
+    utils::set_indent,
+};
+
+fn nl_needed<'src>(last2: Option<&Token<'src>>, last1: Option<&Token<'src>>) -> bool {
+    if let Some(last1) = last1 {
+        if *last1 == Token::Br {
+            return true;
+        }
+
+        if *last1 == Token::Hr {
+            return true;
+        }
+
+        if let Some(last2) = last2 {
+            if *last2 == Token::Img {
+                return true;
+            }
+        }
+    }
+
+    false
+}
+
+pub fn format<'src>(spanned_tokens: &Vec<Spanned<Token<'src>>>) -> String {
+    let mut current_indent: usize = 0;
+    let mut formatted = String::new();
+    let len = spanned_tokens.len();
+
+    for i in 0..len {
+        let last2 = {
+            if i < 2 {
+                None
+            } else {
+                spanned_tokens.get(i - 2).map(|t| &t.0)
+            }
+        };
+
+        let last1 = {
+            if i < 1 {
+                None
+            } else {
+                spanned_tokens.get(i - 1).map(|t| &t.0)
+            }
+        };
+
+        if nl_needed(last2, last1) {
+            formatted.push_str("\n");
+        };
+
+        let spanned_token = &spanned_tokens[i].0;
+
+        let to_push = match spanned_token {
+            Token::LSquare => {
+                current_indent += 1;
+                " [\n".to_owned()
+            }
+            Token::RSquare => {
+                current_indent -= 1;
+                format!("{}\n", set_indent("]", current_indent))
+            }
+
+            Token::NumberArgument(n) => format!("{n}"),
+            Token::TextArgument(t) => format!(" \"{t}\""),
+            Token::TextBody(t) => format!(": {}\n", t.trim()),
+            Token::MLText(t) => format!(
+                " {{\n{}\n{}\n",
+                set_indent(t, current_indent + 1),
+                set_indent("}", current_indent)
+            ),
+            Token::MLMSText(n, t) => format!(
+                " {{~{n}\n{}\n{}\n",
+                set_indent(t, current_indent + 1),
+                set_indent("}", current_indent)
+            ),
+            Token::RMLText(t) => format!(" {{#{t}}}\n"),
+            Token::Comment(c) => format!("{}\n", set_indent(&format!("# {c}"), current_indent)),
+
+            Token::TextTag(t) => format!("{}\n", set_indent(t, current_indent)),
+
+            Token::El => set_indent("el", current_indent),
+            Token::H => set_indent("h", current_indent),
+            Token::P => set_indent("p", current_indent),
+            Token::Br => set_indent("br", current_indent),
+            Token::Ul => set_indent("ul", current_indent),
+            Token::Ol => set_indent("ol", current_indent),
+            Token::Row => set_indent("row", current_indent),
+            Token::Link => set_indent("link", current_indent),
+            Token::Navlink => set_indent("navlink", current_indent),
+            Token::Btn => set_indent("btn", current_indent),
+            Token::Navbtn => set_indent("navbtn", current_indent),
+            Token::Img => set_indent("img", current_indent),
+            Token::Table => set_indent("table", current_indent),
+            Token::Tcol => set_indent("tcol", current_indent),
+            Token::Tpcol => set_indent("tpcol", current_indent),
+            Token::Hr => set_indent("hr", current_indent),
+            Token::B => set_indent("b", current_indent),
+            Token::I => set_indent("i", current_indent),
+            Token::Bq => set_indent("bq", current_indent),
+            Token::Footlnk => set_indent("footlnk", current_indent),
+            Token::Footn => set_indent("footn", current_indent),
+            Token::A => set_indent("a", current_indent),
+            Token::S => set_indent("s", current_indent),
+            Token::Sup => set_indent("sup", current_indent),
+            Token::Sub => set_indent("sub", current_indent),
+            Token::Disc => set_indent("disc", current_indent),
+            Token::Block => set_indent("block", current_indent),
+            Token::Carousel => set_indent("carousel", current_indent),
+            Token::Code => set_indent("code", current_indent),
+            Token::Pre => set_indent("pre", current_indent),
+            Token::Meta => set_indent("meta", current_indent),
+
+            Token::ElOpen => {
+                let s = set_indent("[[", current_indent);
+                current_indent += 1;
+                format!("{s}\n")
+            }
+            Token::ElClose => {
+                current_indent -= 1;
+                format!("{}\n", set_indent("]]", current_indent))
+            }
+            Token::Paragraph(t) => format!(
+                "{{-\n{}\n{}\n",
+                set_indent(t, current_indent + 1),
+                set_indent("}", current_indent)
+            ),
+
+            Token::EmptyLine => "\n".to_owned(),
+        };
+
+        formatted.push_str(&to_push);
+    }
+
+    formatted.trim().to_owned()
+}
--- a/src/daleth/lexer/mod.rs
+++ b/src/daleth/lexer/mod.rs
@ -42,11 +42,10 @@ pub fn lexer<'src>(
    .labelled("Tag");

    let symbol = choice((
-        // just("(").to(Token::LParen).labelled("("),
-        // just(")").to(Token::RParen).labelled(")"),
+        just("[[").to(Token::ElOpen).labelled("[["),
+        just("]]").to(Token::ElClose).labelled("]]"),
        just("[").to(Token::LSquare).labelled("["),
        just("]").to(Token::RSquare).labelled("]"),
-        // just(":").to(Token::Colon).labelled(":"),
    ));

    let argument = {
@ -64,7 +63,7 @@ pub fn lexer<'src>(
            .or(arg_escape)
            .repeated()
            .to_slice()
-            .delimited_by(just("\""), just("\""))
+            .delimited_by(just('"'), just('"'))
            .map(Token::TextArgument)
            .labelled("Text argument");

@ -78,8 +77,8 @@ pub fn lexer<'src>(

        let text = none_of("\n").repeated().to_slice();

-        let text_body = text
-            .delimited_by(just(':'), just('\n'))
+        let text_body = just(':')
+            .ignore_then(text)
            .map(Token::TextBody)
            .labelled("One line text body");

@ -93,15 +92,12 @@ pub fn lexer<'src>(
            .repeated()
            .labelled("Body of multiline text");

-        let mlms_n = just("{~")
-            .ignore_then(text::int(10).from_str().unwrapped())
-            .labelled("Minimum spaces number");
-
-        let mlmstext = mlms_n
-            .then(multiline_text_body.clone().to_slice())
-            .then_ignore(just("}"))
-            .map(|(n, t)| Token::MLMSText(n, t))
-            .labelled("Multi line text with min spaces");
+        let paragraph = multiline_text_body
+            .clone()
+            .to_slice()
+            .delimited_by(just("{-"), just("}"))
+            .map(Token::Paragraph)
+            .labelled("Paragraph syntax");

        let mltext = multiline_text_body
            .clone()
@ -110,26 +106,40 @@ pub fn lexer<'src>(
            .map(Token::MLText)
            .labelled("Multiline text");

+        let mlmstext = {
+            let mlms_n = just("{~")
+                .ignore_then(text::int(10).from_str().unwrapped())
+                .labelled("Minimum spaces number");
+
+            mlms_n
+                .then(multiline_text_body.clone().to_slice())
+                .then_ignore(just("}"))
+                .map(|(n, t)| Token::MLMSText(n, t))
+                .labelled("Multi line text with min spaces")
+        };
+
        let rmltext = multiline_text_body
            .to_slice()
            .delimited_by(just("{#"), just('}'))
            .map(Token::RMLText)
            .labelled("Raw multiline text");

-        choice((mlmstext, mltext, rmltext, text_body, text_tag))
+        choice((paragraph, mlmstext, rmltext, mltext, text_body, text_tag))
    };

-    let comment = none_of("\n")
-        .repeated()
-        .to_slice()
-        .delimited_by(just('#'), just('\n'))
+    let comment = just('#')
+        .ignore_then(none_of("\n").repeated().to_slice())
        .map(Token::Comment);

-    let token = choice((comment, symbol, tag, argument, textual));
+    let empty_line = text::inline_whitespace()
+        .delimited_by(text::newline(), text::newline())
+        .to(Token::EmptyLine);
+
+    let token = choice((empty_line.clone(), comment, symbol, tag, argument, textual));

    token
+        .padded_by(text::whitespace().and_is(empty_line.not()).or_not())
        .map_with(|t, e| (t, e.span()))
-        .padded()
        .repeated()
        .collect()
 }
--- a/src/daleth/lexer/types.rs
+++ b/src/daleth/lexer/types.rs
@ -6,16 +6,14 @@ pub type Spanned<T> = (T, Span);
 #[derive(Clone, Debug, PartialEq)]
 pub enum Token<'src> {
    // Symbols
-    /// (
-    // LParen,
-    /// )
-    // RParen,
    /// [
    LSquare,
    /// ]
    RSquare,
-    /// :
-    // Colon,
+    /// [[
+    ElOpen,
+    /// ]]
+    ElClose,

    // Arguments
    NumberArgument(u8),
@ -31,8 +29,12 @@ pub enum Token<'src> {
    RMLText(&'src str),

    /// Special
-    Comment(&'src str),
    TextTag(&'src str),
+    Paragraph(&'src str),
+
+    /// Special removed before parse
+    Comment(&'src str),
+    EmptyLine,

    // Tags
    El,
@ -68,8 +70,8 @@ pub enum Token<'src> {
    Meta,
 }

-#[derive(Clone, Debug, PartialEq)]
-pub enum Argument<'src> {
-    Number(u8),
-    Argument(&'src str),
+impl<'src> From<Spanned<Token<'src>>> for Token<'src> {
+    fn from(value: Spanned<Token<'src>>) -> Self {
+        value.0
+    }
 }
--- a/src/daleth/mod.rs
+++ b/src/daleth/mod.rs
@ -1 +1,3 @@
+pub mod format;
 pub mod lexer;
+pub mod utils;
--- a/src/daleth/utils.rs
+++ b/src/daleth/utils.rs
@ -0,0 +1,52 @@
+pub fn trim_indent(input: &str) -> String {
+    let lines: Vec<&str> = input.lines().collect();
+
+    // Find the minimum indentation of non-empty lines
+    let min_indent = lines
+        .iter()
+        .filter(|line| !line.trim().is_empty())
+        .map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
+        .min()
+        .unwrap_or(0);
+
+    // Trim the leading whitespace from each line by the minimum indentation
+    let trimmed_lines: Vec<&str> = lines
+        .into_iter()
+        .map(|line| {
+            if line.len() < min_indent {
+                line
+            } else {
+                &line[min_indent..]
+            }
+        })
+        .collect();
+
+    trim_newline(&trimmed_lines.join("\n")).to_owned()
+}
+
+pub fn set_indent(input: &str, indent: usize) -> String {
+    prepend_indent(&trim_indent(input), &"    ".repeat(indent))
+}
+
+fn trim_newline<'a>(s: &'a str) -> &'a str {
+    let mut trim_start = 0;
+
+    for start_char in s.chars() {
+        if start_char != '\n' && start_char != '\r' {
+            break;
+        }
+
+        trim_start += 1;
+    }
+
+    &s[(trim_start)..].trim_end()
+}
+
+fn prepend_indent(input: &str, indent: &str) -> String {
+    let lines: Vec<String> = input
+        .lines()
+        .map(|line| format!("{}{}", indent, line))
+        .collect();
+
+    lines.join("\n")
+}
--- a/src/parsers/gemtext.rs
+++ b/src/parsers/gemtext.rs
@ -1,5 +1,5 @@
 use crate::typed::{
-    Body, Hl, Page, TNArg,
+    Body, Hl, Page, TNullArg,
    Tag::{self, *},
 };

@ -32,9 +32,7 @@ pub fn parse_gemtext(s: &str) -> Result<Page, GemTextParseError> {
            let url = body.next().ok_or(GemTextParseError::InvalidLink)?.trim();

            match body.next() {
-                Some(label) => page.push(P(
-                    vec![Navlink(label.trim().into(), url.into())].into()
-                )),
+                Some(label) => page.push(P(vec![Navlink(label.trim().into(), url.into())].into())),
                None => page.push(P(vec![Navlink(Body::Null, url.into())].into())),
            };
        } else if line.starts_with("# ") {
@ -55,7 +53,7 @@ pub fn parse_gemtext(s: &str) -> Result<Page, GemTextParseError> {
            page.push(Bq(body.into()));
        } else if line.starts_with("```") {
            if preformatted {
-                page.push(Code(preformatted_text.join("\n"), TNArg::Null));
+                page.push(Code(preformatted_text.join("\n"), TNullArg::Null));
                preformatted_text.clear();
            }

--- a/src/traits/from_daletl.rs
+++ b/src/traits/from_daletl.rs
@ -70,13 +70,13 @@ impl TryFrom<DlArgument> for AlignArg {
    }
 }

-impl TryFrom<DlArgument> for TNArg {
+impl TryFrom<DlArgument> for TNullArg {
    type Error = ConversionError;

    fn try_from(value: DlArgument) -> Result<Self, Self::Error> {
        match value {
-            DlArgument::Text(t) => Ok(TNArg::Text(t)),
-            DlArgument::Null => Ok(TNArg::Null),
+            DlArgument::Text(t) => Ok(TNullArg::Text(t)),
+            DlArgument::Null => Ok(TNullArg::Null),
            _ => Err(ConversionError),
        }
    }
--- a/src/traits/to_daletl.rs
+++ b/src/traits/to_daletl.rs
@ -64,11 +64,11 @@ impl From<AlignArg> for DlArgument {
    }
 }

-impl From<TNArg> for DlArgument {
-    fn from(item: TNArg) -> DlArgument {
+impl From<TNullArg> for DlArgument {
+    fn from(item: TNullArg) -> DlArgument {
        match item {
-            TNArg::Text(s) => s.into(),
-            TNArg::Null => NA,
+            TNullArg::Text(s) => s.into(),
+            TNullArg::Null => NA,
        }
    }
 }
--- a/src/typed.rs
+++ b/src/typed.rs
@ -44,7 +44,7 @@ pub enum Tag {
    Disc(NNBody),
    Block(NNBody, AlignArg),
    Carousel(Vec<Tag>),
-    Code(TBody, TNArg),
+    Code(TBody, TNullArg),
    Pre(TBody),
    Meta(TBody, TArg),
 }
@ -73,7 +73,7 @@ pub enum Arg {
 }

 #[derive(AutoFrom, Debug, Clone, PartialEq, Eq)]
-pub enum TNArg {
+pub enum TNullArg {
    Text(String),
    Null,
 }