From 74f42aa3141cf56b27fe2ad4b9607cabc47f1753 Mon Sep 17 00:00:00 2001 From: Artemy Egorov Date: Fri, 9 Aug 2024 18:24:42 +0300 Subject: [PATCH] feat: formatter TODO: fix tags with optional body formatter --- examples/bench.rs | 4 +- examples/daleth.dlth | 45 +++++++------ examples/daleth_lexer.rs | 9 ++- src/daleth/format.rs | 136 ++++++++++++++++++++++++++++++++++++++ src/daleth/lexer/mod.rs | 54 +++++++++------ src/daleth/lexer/types.rs | 24 ++++--- src/daleth/mod.rs | 2 + src/daleth/utils.rs | 52 +++++++++++++++ src/parsers/gemtext.rs | 8 +-- src/traits/from_daletl.rs | 6 +- src/traits/to_daletl.rs | 8 +-- src/typed.rs | 4 +- 12 files changed, 281 insertions(+), 71 deletions(-) create mode 100644 src/daleth/format.rs create mode 100644 src/daleth/utils.rs diff --git a/examples/bench.rs b/examples/bench.rs index f3c9607..ea4b5d2 100644 --- a/examples/bench.rs +++ b/examples/bench.rs @@ -1,6 +1,6 @@ use dalet::{ daletpack::*, - typed::{Hl, TNArg, Tag::*}, + typed::{Hl, TNullArg, Tag::*}, }; use flate2::Compression; use std::io::Write; @@ -41,7 +41,7 @@ fn main() { ] .into()), Br, - Code("Hello world".into(), TNArg::Null), + Code("Hello world".into(), TNullArg::Null), Br, Ul(vec![ El("abc".into()), diff --git a/examples/daleth.dlth b/examples/daleth.dlth index 2679bab..d2a6ced 100644 --- a/examples/daleth.dlth +++ b/examples/daleth.dlth @@ -5,7 +5,8 @@ # {~n text} - n is number of minimum spaces to add after trimming with indent # for each line # -# {# text} - input not modified +# {#text} - input not modified +# # # tag syntax # @@ -19,6 +20,13 @@ # tag argument # # Tags without body and argument also supported +# +# +# custom no tag syntax +# +# {-text} - paragraph, text indent is trimmed +# [[tags]] - element tag with body of multiple tags +# text - element tag with text body meta "title": Daleth syntax concept meta "description": This document describes Daleth syntax and some tags @@ -27,19 +35,19 @@ h1: TxtDot revolution p: TxtDot is a cool project # If no tag is specified, then the 'el' tag is placed + This is element br -# if no tag is specified but a '{}' is present, then the 'p' tag is placed -# '\n' is deleted only in this format. If a break line is needed in a paragraph, use ' \n'. -{ +# if no tag is specified but a '{- text}' is present, then the 'p' tag is placed +# '\n' is deleted in this format. If a break line is needed in a paragraph, use ' \n'. +{- Check Dalet too This is one paragraph } -{ This is another paragraph } +{- This is another paragraph ({- text\}) } -# ( ) for argument row "center" [ link "https://github.com/txtdot/txtdot": Homepage btn "https://example.com/donate" [ @@ -51,7 +59,9 @@ row "center" [ # [] for multiple tags row [ - [ + # if no tag is specified but a '[[]]' is present, then the 'el' tag + # with multiple tags body placed + [[ h2: Features ul [ @@ -65,13 +75,12 @@ row [ Some kind of Material Design 3 Customization with plugins, see @txtdot/sdk and @txtdot/plugins ] + ]] - ] - - [ + [[ h2: Running - [ + [[ h3: Dev # {} for multiline strings, indent is automatically trimmed @@ -87,25 +96,23 @@ row [ # {# Text} Text after "`# " not modified code "markdown" {# this is codeblock} - ] + ]] - [ + [[ h3: Production - code { npm install npm run build npm run start } - ] + ]] - [ + [[ h3: Docker - code: docker compose up -d - ] + ]] - ] + ]] ] # Table has custom format if text used diff --git a/examples/daleth_lexer.rs b/examples/daleth_lexer.rs index e0215c0..dfffa73 100644 --- a/examples/daleth_lexer.rs +++ b/examples/daleth_lexer.rs @@ -1,6 +1,6 @@ use ariadne::{Color, Label, Report, ReportKind, Source}; use chumsky::Parser; -use dalet::daleth::lexer::lexer; +use dalet::daleth::{format::format, lexer::lexer}; fn main() { let src_file = "daleth.dlth"; @@ -9,7 +9,10 @@ fn main() { let parsed = lexer().parse(src); match parsed.into_result() { - Ok(t) => println!("{:#?}", t), + Ok(t) => { + println!("{:#?}", t); + println!("{}", format(&t)); + } Err(e) => e.into_iter().for_each(|e| { Report::build(ReportKind::Error, src_file, e.span().start) .with_code("Compiler") @@ -23,5 +26,5 @@ fn main() { .print((src_file, Source::from(&src))) .unwrap() }), - } + }; } diff --git a/src/daleth/format.rs b/src/daleth/format.rs new file mode 100644 index 0000000..9fb552b --- /dev/null +++ b/src/daleth/format.rs @@ -0,0 +1,136 @@ +use super::{ + lexer::types::{Spanned, Token}, + utils::set_indent, +}; + +fn nl_needed<'src>(last2: Option<&Token<'src>>, last1: Option<&Token<'src>>) -> bool { + if let Some(last1) = last1 { + if *last1 == Token::Br { + return true; + } + + if *last1 == Token::Hr { + return true; + } + + if let Some(last2) = last2 { + if *last2 == Token::Img { + return true; + } + } + } + + false +} + +pub fn format<'src>(spanned_tokens: &Vec>>) -> String { + let mut current_indent: usize = 0; + let mut formatted = String::new(); + let len = spanned_tokens.len(); + + for i in 0..len { + let last2 = { + if i < 2 { + None + } else { + spanned_tokens.get(i - 2).map(|t| &t.0) + } + }; + + let last1 = { + if i < 1 { + None + } else { + spanned_tokens.get(i - 1).map(|t| &t.0) + } + }; + + if nl_needed(last2, last1) { + formatted.push_str("\n"); + }; + + let spanned_token = &spanned_tokens[i].0; + + let to_push = match spanned_token { + Token::LSquare => { + current_indent += 1; + " [\n".to_owned() + } + Token::RSquare => { + current_indent -= 1; + format!("{}\n", set_indent("]", current_indent)) + } + + Token::NumberArgument(n) => format!("{n}"), + Token::TextArgument(t) => format!(" \"{t}\""), + Token::TextBody(t) => format!(": {}\n", t.trim()), + Token::MLText(t) => format!( + " {{\n{}\n{}\n", + set_indent(t, current_indent + 1), + set_indent("}", current_indent) + ), + Token::MLMSText(n, t) => format!( + " {{~{n}\n{}\n{}\n", + set_indent(t, current_indent + 1), + set_indent("}", current_indent) + ), + Token::RMLText(t) => format!(" {{#{t}}}\n"), + Token::Comment(c) => format!("{}\n", set_indent(&format!("# {c}"), current_indent)), + + Token::TextTag(t) => format!("{}\n", set_indent(t, current_indent)), + + Token::El => set_indent("el", current_indent), + Token::H => set_indent("h", current_indent), + Token::P => set_indent("p", current_indent), + Token::Br => set_indent("br", current_indent), + Token::Ul => set_indent("ul", current_indent), + Token::Ol => set_indent("ol", current_indent), + Token::Row => set_indent("row", current_indent), + Token::Link => set_indent("link", current_indent), + Token::Navlink => set_indent("navlink", current_indent), + Token::Btn => set_indent("btn", current_indent), + Token::Navbtn => set_indent("navbtn", current_indent), + Token::Img => set_indent("img", current_indent), + Token::Table => set_indent("table", current_indent), + Token::Tcol => set_indent("tcol", current_indent), + Token::Tpcol => set_indent("tpcol", current_indent), + Token::Hr => set_indent("hr", current_indent), + Token::B => set_indent("b", current_indent), + Token::I => set_indent("i", current_indent), + Token::Bq => set_indent("bq", current_indent), + Token::Footlnk => set_indent("footlnk", current_indent), + Token::Footn => set_indent("footn", current_indent), + Token::A => set_indent("a", current_indent), + Token::S => set_indent("s", current_indent), + Token::Sup => set_indent("sup", current_indent), + Token::Sub => set_indent("sub", current_indent), + Token::Disc => set_indent("disc", current_indent), + Token::Block => set_indent("block", current_indent), + Token::Carousel => set_indent("carousel", current_indent), + Token::Code => set_indent("code", current_indent), + Token::Pre => set_indent("pre", current_indent), + Token::Meta => set_indent("meta", current_indent), + + Token::ElOpen => { + let s = set_indent("[[", current_indent); + current_indent += 1; + format!("{s}\n") + } + Token::ElClose => { + current_indent -= 1; + format!("{}\n", set_indent("]]", current_indent)) + } + Token::Paragraph(t) => format!( + "{{-\n{}\n{}\n", + set_indent(t, current_indent + 1), + set_indent("}", current_indent) + ), + + Token::EmptyLine => "\n".to_owned(), + }; + + formatted.push_str(&to_push); + } + + formatted.trim().to_owned() +} diff --git a/src/daleth/lexer/mod.rs b/src/daleth/lexer/mod.rs index b25ec5f..d0d3262 100644 --- a/src/daleth/lexer/mod.rs +++ b/src/daleth/lexer/mod.rs @@ -42,11 +42,10 @@ pub fn lexer<'src>( .labelled("Tag"); let symbol = choice(( - // just("(").to(Token::LParen).labelled("("), - // just(")").to(Token::RParen).labelled(")"), + just("[[").to(Token::ElOpen).labelled("[["), + just("]]").to(Token::ElClose).labelled("]]"), just("[").to(Token::LSquare).labelled("["), just("]").to(Token::RSquare).labelled("]"), - // just(":").to(Token::Colon).labelled(":"), )); let argument = { @@ -64,7 +63,7 @@ pub fn lexer<'src>( .or(arg_escape) .repeated() .to_slice() - .delimited_by(just("\""), just("\"")) + .delimited_by(just('"'), just('"')) .map(Token::TextArgument) .labelled("Text argument"); @@ -78,8 +77,8 @@ pub fn lexer<'src>( let text = none_of("\n").repeated().to_slice(); - let text_body = text - .delimited_by(just(':'), just('\n')) + let text_body = just(':') + .ignore_then(text) .map(Token::TextBody) .labelled("One line text body"); @@ -93,15 +92,12 @@ pub fn lexer<'src>( .repeated() .labelled("Body of multiline text"); - let mlms_n = just("{~") - .ignore_then(text::int(10).from_str().unwrapped()) - .labelled("Minimum spaces number"); - - let mlmstext = mlms_n - .then(multiline_text_body.clone().to_slice()) - .then_ignore(just("}")) - .map(|(n, t)| Token::MLMSText(n, t)) - .labelled("Multi line text with min spaces"); + let paragraph = multiline_text_body + .clone() + .to_slice() + .delimited_by(just("{-"), just("}")) + .map(Token::Paragraph) + .labelled("Paragraph syntax"); let mltext = multiline_text_body .clone() @@ -110,26 +106,40 @@ pub fn lexer<'src>( .map(Token::MLText) .labelled("Multiline text"); + let mlmstext = { + let mlms_n = just("{~") + .ignore_then(text::int(10).from_str().unwrapped()) + .labelled("Minimum spaces number"); + + mlms_n + .then(multiline_text_body.clone().to_slice()) + .then_ignore(just("}")) + .map(|(n, t)| Token::MLMSText(n, t)) + .labelled("Multi line text with min spaces") + }; + let rmltext = multiline_text_body .to_slice() .delimited_by(just("{#"), just('}')) .map(Token::RMLText) .labelled("Raw multiline text"); - choice((mlmstext, mltext, rmltext, text_body, text_tag)) + choice((paragraph, mlmstext, rmltext, mltext, text_body, text_tag)) }; - let comment = none_of("\n") - .repeated() - .to_slice() - .delimited_by(just('#'), just('\n')) + let comment = just('#') + .ignore_then(none_of("\n").repeated().to_slice()) .map(Token::Comment); - let token = choice((comment, symbol, tag, argument, textual)); + let empty_line = text::inline_whitespace() + .delimited_by(text::newline(), text::newline()) + .to(Token::EmptyLine); + + let token = choice((empty_line.clone(), comment, symbol, tag, argument, textual)); token + .padded_by(text::whitespace().and_is(empty_line.not()).or_not()) .map_with(|t, e| (t, e.span())) - .padded() .repeated() .collect() } diff --git a/src/daleth/lexer/types.rs b/src/daleth/lexer/types.rs index c189a1a..53ef6a3 100644 --- a/src/daleth/lexer/types.rs +++ b/src/daleth/lexer/types.rs @@ -6,16 +6,14 @@ pub type Spanned = (T, Span); #[derive(Clone, Debug, PartialEq)] pub enum Token<'src> { // Symbols - /// ( - // LParen, - /// ) - // RParen, /// [ LSquare, /// ] RSquare, - /// : - // Colon, + /// [[ + ElOpen, + /// ]] + ElClose, // Arguments NumberArgument(u8), @@ -31,8 +29,12 @@ pub enum Token<'src> { RMLText(&'src str), /// Special - Comment(&'src str), TextTag(&'src str), + Paragraph(&'src str), + + /// Special removed before parse + Comment(&'src str), + EmptyLine, // Tags El, @@ -68,8 +70,8 @@ pub enum Token<'src> { Meta, } -#[derive(Clone, Debug, PartialEq)] -pub enum Argument<'src> { - Number(u8), - Argument(&'src str), +impl<'src> From>> for Token<'src> { + fn from(value: Spanned>) -> Self { + value.0 + } } diff --git a/src/daleth/mod.rs b/src/daleth/mod.rs index fc84151..f0f0692 100644 --- a/src/daleth/mod.rs +++ b/src/daleth/mod.rs @@ -1 +1,3 @@ +pub mod format; pub mod lexer; +pub mod utils; diff --git a/src/daleth/utils.rs b/src/daleth/utils.rs new file mode 100644 index 0000000..365791b --- /dev/null +++ b/src/daleth/utils.rs @@ -0,0 +1,52 @@ +pub fn trim_indent(input: &str) -> String { + let lines: Vec<&str> = input.lines().collect(); + + // Find the minimum indentation of non-empty lines + let min_indent = lines + .iter() + .filter(|line| !line.trim().is_empty()) + .map(|line| line.chars().take_while(|c| c.is_whitespace()).count()) + .min() + .unwrap_or(0); + + // Trim the leading whitespace from each line by the minimum indentation + let trimmed_lines: Vec<&str> = lines + .into_iter() + .map(|line| { + if line.len() < min_indent { + line + } else { + &line[min_indent..] + } + }) + .collect(); + + trim_newline(&trimmed_lines.join("\n")).to_owned() +} + +pub fn set_indent(input: &str, indent: usize) -> String { + prepend_indent(&trim_indent(input), &" ".repeat(indent)) +} + +fn trim_newline<'a>(s: &'a str) -> &'a str { + let mut trim_start = 0; + + for start_char in s.chars() { + if start_char != '\n' && start_char != '\r' { + break; + } + + trim_start += 1; + } + + &s[(trim_start)..].trim_end() +} + +fn prepend_indent(input: &str, indent: &str) -> String { + let lines: Vec = input + .lines() + .map(|line| format!("{}{}", indent, line)) + .collect(); + + lines.join("\n") +} diff --git a/src/parsers/gemtext.rs b/src/parsers/gemtext.rs index aa25b28..94c4503 100644 --- a/src/parsers/gemtext.rs +++ b/src/parsers/gemtext.rs @@ -1,5 +1,5 @@ use crate::typed::{ - Body, Hl, Page, TNArg, + Body, Hl, Page, TNullArg, Tag::{self, *}, }; @@ -32,9 +32,7 @@ pub fn parse_gemtext(s: &str) -> Result { let url = body.next().ok_or(GemTextParseError::InvalidLink)?.trim(); match body.next() { - Some(label) => page.push(P( - vec![Navlink(label.trim().into(), url.into())].into() - )), + Some(label) => page.push(P(vec![Navlink(label.trim().into(), url.into())].into())), None => page.push(P(vec![Navlink(Body::Null, url.into())].into())), }; } else if line.starts_with("# ") { @@ -55,7 +53,7 @@ pub fn parse_gemtext(s: &str) -> Result { page.push(Bq(body.into())); } else if line.starts_with("```") { if preformatted { - page.push(Code(preformatted_text.join("\n"), TNArg::Null)); + page.push(Code(preformatted_text.join("\n"), TNullArg::Null)); preformatted_text.clear(); } diff --git a/src/traits/from_daletl.rs b/src/traits/from_daletl.rs index bde216d..26c27f5 100644 --- a/src/traits/from_daletl.rs +++ b/src/traits/from_daletl.rs @@ -70,13 +70,13 @@ impl TryFrom for AlignArg { } } -impl TryFrom for TNArg { +impl TryFrom for TNullArg { type Error = ConversionError; fn try_from(value: DlArgument) -> Result { match value { - DlArgument::Text(t) => Ok(TNArg::Text(t)), - DlArgument::Null => Ok(TNArg::Null), + DlArgument::Text(t) => Ok(TNullArg::Text(t)), + DlArgument::Null => Ok(TNullArg::Null), _ => Err(ConversionError), } } diff --git a/src/traits/to_daletl.rs b/src/traits/to_daletl.rs index 089b1aa..c33bcb9 100644 --- a/src/traits/to_daletl.rs +++ b/src/traits/to_daletl.rs @@ -64,11 +64,11 @@ impl From for DlArgument { } } -impl From for DlArgument { - fn from(item: TNArg) -> DlArgument { +impl From for DlArgument { + fn from(item: TNullArg) -> DlArgument { match item { - TNArg::Text(s) => s.into(), - TNArg::Null => NA, + TNullArg::Text(s) => s.into(), + TNullArg::Null => NA, } } } diff --git a/src/typed.rs b/src/typed.rs index 0f01219..b949378 100644 --- a/src/typed.rs +++ b/src/typed.rs @@ -44,7 +44,7 @@ pub enum Tag { Disc(NNBody), Block(NNBody, AlignArg), Carousel(Vec), - Code(TBody, TNArg), + Code(TBody, TNullArg), Pre(TBody), Meta(TBody, TArg), } @@ -73,7 +73,7 @@ pub enum Arg { } #[derive(AutoFrom, Debug, Clone, PartialEq, Eq)] -pub enum TNArg { +pub enum TNullArg { Text(String), Null, }