feat: formatter

TODO: fix tags with optional body formatter
This commit is contained in:
Artemy Egorov 2024-08-09 18:24:42 +03:00
parent dfe50cd0f4
commit 74f42aa314
12 changed files with 281 additions and 71 deletions

View file

@ -1,6 +1,6 @@
use dalet::{
daletpack::*,
typed::{Hl, TNArg, Tag::*},
typed::{Hl, TNullArg, Tag::*},
};
use flate2::Compression;
use std::io::Write;
@ -41,7 +41,7 @@ fn main() {
]
.into()),
Br,
Code("Hello world".into(), TNArg::Null),
Code("Hello world".into(), TNullArg::Null),
Br,
Ul(vec![
El("abc".into()),

View file

@ -7,6 +7,7 @@
#
# {#text} - input not modified
#
#
# tag syntax
#
# tag: text body
@ -19,6 +20,13 @@
# tag argument
#
# Tags without body and argument also supported
#
#
# custom no tag syntax
#
# {-text} - paragraph, text indent is trimmed
# [[tags]] - element tag with body of multiple tags
# text - element tag with text body
meta "title": Daleth syntax concept
meta "description": This document describes Daleth syntax and some tags
@ -27,19 +35,19 @@ h1: TxtDot revolution
p: TxtDot is a cool project
# If no tag is specified, then the 'el' tag is placed
This is element
br
# if no tag is specified but a '{}' is present, then the 'p' tag is placed
# '\n' is deleted only in this format. If a break line is needed in a paragraph, use ' \n'.
{
# if no tag is specified but a '{- text}' is present, then the 'p' tag is placed
# '\n' is deleted in this format. If a break line is needed in a paragraph, use ' \n'.
{-
Check Dalet too
This is one paragraph
}
{ This is another paragraph }
{- This is another paragraph ({- text\}) }
# ( ) for argument
row "center" [
link "https://github.com/txtdot/txtdot": Homepage
btn "https://example.com/donate" [
@ -51,7 +59,9 @@ row "center" [
# [] for multiple tags
row [
[
# if no tag is specified but a '[[]]' is present, then the 'el' tag
# with multiple tags body placed
[[
h2: Features
ul [
@ -65,13 +75,12 @@ row [
Some kind of Material Design 3
Customization with plugins, see @txtdot/sdk and @txtdot/plugins
]
]]
]
[
[[
h2: Running
[
[[
h3: Dev
# {} for multiline strings, indent is automatically trimmed
@ -87,25 +96,23 @@ row [
# {# Text} Text after "`# " not modified
code "markdown" {# this is codeblock}
]
]]
[
[[
h3: Production
code {
npm install
npm run build
npm run start
}
]
]]
[
[[
h3: Docker
code: docker compose up -d
]
]]
]
]]
]
# Table has custom format if text used

View file

@ -1,6 +1,6 @@
use ariadne::{Color, Label, Report, ReportKind, Source};
use chumsky::Parser;
use dalet::daleth::lexer::lexer;
use dalet::daleth::{format::format, lexer::lexer};
fn main() {
let src_file = "daleth.dlth";
@ -9,7 +9,10 @@ fn main() {
let parsed = lexer().parse(src);
match parsed.into_result() {
Ok(t) => println!("{:#?}", t),
Ok(t) => {
println!("{:#?}", t);
println!("{}", format(&t));
}
Err(e) => e.into_iter().for_each(|e| {
Report::build(ReportKind::Error, src_file, e.span().start)
.with_code("Compiler")
@ -23,5 +26,5 @@ fn main() {
.print((src_file, Source::from(&src)))
.unwrap()
}),
}
};
}

136
src/daleth/format.rs Normal file
View file

@ -0,0 +1,136 @@
use super::{
lexer::types::{Spanned, Token},
utils::set_indent,
};
fn nl_needed<'src>(last2: Option<&Token<'src>>, last1: Option<&Token<'src>>) -> bool {
if let Some(last1) = last1 {
if *last1 == Token::Br {
return true;
}
if *last1 == Token::Hr {
return true;
}
if let Some(last2) = last2 {
if *last2 == Token::Img {
return true;
}
}
}
false
}
pub fn format<'src>(spanned_tokens: &Vec<Spanned<Token<'src>>>) -> String {
let mut current_indent: usize = 0;
let mut formatted = String::new();
let len = spanned_tokens.len();
for i in 0..len {
let last2 = {
if i < 2 {
None
} else {
spanned_tokens.get(i - 2).map(|t| &t.0)
}
};
let last1 = {
if i < 1 {
None
} else {
spanned_tokens.get(i - 1).map(|t| &t.0)
}
};
if nl_needed(last2, last1) {
formatted.push_str("\n");
};
let spanned_token = &spanned_tokens[i].0;
let to_push = match spanned_token {
Token::LSquare => {
current_indent += 1;
" [\n".to_owned()
}
Token::RSquare => {
current_indent -= 1;
format!("{}\n", set_indent("]", current_indent))
}
Token::NumberArgument(n) => format!("{n}"),
Token::TextArgument(t) => format!(" \"{t}\""),
Token::TextBody(t) => format!(": {}\n", t.trim()),
Token::MLText(t) => format!(
" {{\n{}\n{}\n",
set_indent(t, current_indent + 1),
set_indent("}", current_indent)
),
Token::MLMSText(n, t) => format!(
" {{~{n}\n{}\n{}\n",
set_indent(t, current_indent + 1),
set_indent("}", current_indent)
),
Token::RMLText(t) => format!(" {{#{t}}}\n"),
Token::Comment(c) => format!("{}\n", set_indent(&format!("# {c}"), current_indent)),
Token::TextTag(t) => format!("{}\n", set_indent(t, current_indent)),
Token::El => set_indent("el", current_indent),
Token::H => set_indent("h", current_indent),
Token::P => set_indent("p", current_indent),
Token::Br => set_indent("br", current_indent),
Token::Ul => set_indent("ul", current_indent),
Token::Ol => set_indent("ol", current_indent),
Token::Row => set_indent("row", current_indent),
Token::Link => set_indent("link", current_indent),
Token::Navlink => set_indent("navlink", current_indent),
Token::Btn => set_indent("btn", current_indent),
Token::Navbtn => set_indent("navbtn", current_indent),
Token::Img => set_indent("img", current_indent),
Token::Table => set_indent("table", current_indent),
Token::Tcol => set_indent("tcol", current_indent),
Token::Tpcol => set_indent("tpcol", current_indent),
Token::Hr => set_indent("hr", current_indent),
Token::B => set_indent("b", current_indent),
Token::I => set_indent("i", current_indent),
Token::Bq => set_indent("bq", current_indent),
Token::Footlnk => set_indent("footlnk", current_indent),
Token::Footn => set_indent("footn", current_indent),
Token::A => set_indent("a", current_indent),
Token::S => set_indent("s", current_indent),
Token::Sup => set_indent("sup", current_indent),
Token::Sub => set_indent("sub", current_indent),
Token::Disc => set_indent("disc", current_indent),
Token::Block => set_indent("block", current_indent),
Token::Carousel => set_indent("carousel", current_indent),
Token::Code => set_indent("code", current_indent),
Token::Pre => set_indent("pre", current_indent),
Token::Meta => set_indent("meta", current_indent),
Token::ElOpen => {
let s = set_indent("[[", current_indent);
current_indent += 1;
format!("{s}\n")
}
Token::ElClose => {
current_indent -= 1;
format!("{}\n", set_indent("]]", current_indent))
}
Token::Paragraph(t) => format!(
"{{-\n{}\n{}\n",
set_indent(t, current_indent + 1),
set_indent("}", current_indent)
),
Token::EmptyLine => "\n".to_owned(),
};
formatted.push_str(&to_push);
}
formatted.trim().to_owned()
}

View file

@ -42,11 +42,10 @@ pub fn lexer<'src>(
.labelled("Tag");
let symbol = choice((
// just("(").to(Token::LParen).labelled("("),
// just(")").to(Token::RParen).labelled(")"),
just("[[").to(Token::ElOpen).labelled("[["),
just("]]").to(Token::ElClose).labelled("]]"),
just("[").to(Token::LSquare).labelled("["),
just("]").to(Token::RSquare).labelled("]"),
// just(":").to(Token::Colon).labelled(":"),
));
let argument = {
@ -64,7 +63,7 @@ pub fn lexer<'src>(
.or(arg_escape)
.repeated()
.to_slice()
.delimited_by(just("\""), just("\""))
.delimited_by(just('"'), just('"'))
.map(Token::TextArgument)
.labelled("Text argument");
@ -78,8 +77,8 @@ pub fn lexer<'src>(
let text = none_of("\n").repeated().to_slice();
let text_body = text
.delimited_by(just(':'), just('\n'))
let text_body = just(':')
.ignore_then(text)
.map(Token::TextBody)
.labelled("One line text body");
@ -93,15 +92,12 @@ pub fn lexer<'src>(
.repeated()
.labelled("Body of multiline text");
let mlms_n = just("{~")
.ignore_then(text::int(10).from_str().unwrapped())
.labelled("Minimum spaces number");
let mlmstext = mlms_n
.then(multiline_text_body.clone().to_slice())
.then_ignore(just("}"))
.map(|(n, t)| Token::MLMSText(n, t))
.labelled("Multi line text with min spaces");
let paragraph = multiline_text_body
.clone()
.to_slice()
.delimited_by(just("{-"), just("}"))
.map(Token::Paragraph)
.labelled("Paragraph syntax");
let mltext = multiline_text_body
.clone()
@ -110,26 +106,40 @@ pub fn lexer<'src>(
.map(Token::MLText)
.labelled("Multiline text");
let mlmstext = {
let mlms_n = just("{~")
.ignore_then(text::int(10).from_str().unwrapped())
.labelled("Minimum spaces number");
mlms_n
.then(multiline_text_body.clone().to_slice())
.then_ignore(just("}"))
.map(|(n, t)| Token::MLMSText(n, t))
.labelled("Multi line text with min spaces")
};
let rmltext = multiline_text_body
.to_slice()
.delimited_by(just("{#"), just('}'))
.map(Token::RMLText)
.labelled("Raw multiline text");
choice((mlmstext, mltext, rmltext, text_body, text_tag))
choice((paragraph, mlmstext, rmltext, mltext, text_body, text_tag))
};
let comment = none_of("\n")
.repeated()
.to_slice()
.delimited_by(just('#'), just('\n'))
let comment = just('#')
.ignore_then(none_of("\n").repeated().to_slice())
.map(Token::Comment);
let token = choice((comment, symbol, tag, argument, textual));
let empty_line = text::inline_whitespace()
.delimited_by(text::newline(), text::newline())
.to(Token::EmptyLine);
let token = choice((empty_line.clone(), comment, symbol, tag, argument, textual));
token
.padded_by(text::whitespace().and_is(empty_line.not()).or_not())
.map_with(|t, e| (t, e.span()))
.padded()
.repeated()
.collect()
}

View file

@ -6,16 +6,14 @@ pub type Spanned<T> = (T, Span);
#[derive(Clone, Debug, PartialEq)]
pub enum Token<'src> {
// Symbols
/// (
// LParen,
/// )
// RParen,
/// [
LSquare,
/// ]
RSquare,
/// :
// Colon,
/// [[
ElOpen,
/// ]]
ElClose,
// Arguments
NumberArgument(u8),
@ -31,8 +29,12 @@ pub enum Token<'src> {
RMLText(&'src str),
/// Special
Comment(&'src str),
TextTag(&'src str),
Paragraph(&'src str),
/// Special removed before parse
Comment(&'src str),
EmptyLine,
// Tags
El,
@ -68,8 +70,8 @@ pub enum Token<'src> {
Meta,
}
#[derive(Clone, Debug, PartialEq)]
pub enum Argument<'src> {
Number(u8),
Argument(&'src str),
impl<'src> From<Spanned<Token<'src>>> for Token<'src> {
fn from(value: Spanned<Token<'src>>) -> Self {
value.0
}
}

View file

@ -1 +1,3 @@
pub mod format;
pub mod lexer;
pub mod utils;

52
src/daleth/utils.rs Normal file
View file

@ -0,0 +1,52 @@
pub fn trim_indent(input: &str) -> String {
let lines: Vec<&str> = input.lines().collect();
// Find the minimum indentation of non-empty lines
let min_indent = lines
.iter()
.filter(|line| !line.trim().is_empty())
.map(|line| line.chars().take_while(|c| c.is_whitespace()).count())
.min()
.unwrap_or(0);
// Trim the leading whitespace from each line by the minimum indentation
let trimmed_lines: Vec<&str> = lines
.into_iter()
.map(|line| {
if line.len() < min_indent {
line
} else {
&line[min_indent..]
}
})
.collect();
trim_newline(&trimmed_lines.join("\n")).to_owned()
}
pub fn set_indent(input: &str, indent: usize) -> String {
prepend_indent(&trim_indent(input), &" ".repeat(indent))
}
fn trim_newline<'a>(s: &'a str) -> &'a str {
let mut trim_start = 0;
for start_char in s.chars() {
if start_char != '\n' && start_char != '\r' {
break;
}
trim_start += 1;
}
&s[(trim_start)..].trim_end()
}
fn prepend_indent(input: &str, indent: &str) -> String {
let lines: Vec<String> = input
.lines()
.map(|line| format!("{}{}", indent, line))
.collect();
lines.join("\n")
}

View file

@ -1,5 +1,5 @@
use crate::typed::{
Body, Hl, Page, TNArg,
Body, Hl, Page, TNullArg,
Tag::{self, *},
};
@ -32,9 +32,7 @@ pub fn parse_gemtext(s: &str) -> Result<Page, GemTextParseError> {
let url = body.next().ok_or(GemTextParseError::InvalidLink)?.trim();
match body.next() {
Some(label) => page.push(P(
vec![Navlink(label.trim().into(), url.into())].into()
)),
Some(label) => page.push(P(vec![Navlink(label.trim().into(), url.into())].into())),
None => page.push(P(vec![Navlink(Body::Null, url.into())].into())),
};
} else if line.starts_with("# ") {
@ -55,7 +53,7 @@ pub fn parse_gemtext(s: &str) -> Result<Page, GemTextParseError> {
page.push(Bq(body.into()));
} else if line.starts_with("```") {
if preformatted {
page.push(Code(preformatted_text.join("\n"), TNArg::Null));
page.push(Code(preformatted_text.join("\n"), TNullArg::Null));
preformatted_text.clear();
}

View file

@ -70,13 +70,13 @@ impl TryFrom<DlArgument> for AlignArg {
}
}
impl TryFrom<DlArgument> for TNArg {
impl TryFrom<DlArgument> for TNullArg {
type Error = ConversionError;
fn try_from(value: DlArgument) -> Result<Self, Self::Error> {
match value {
DlArgument::Text(t) => Ok(TNArg::Text(t)),
DlArgument::Null => Ok(TNArg::Null),
DlArgument::Text(t) => Ok(TNullArg::Text(t)),
DlArgument::Null => Ok(TNullArg::Null),
_ => Err(ConversionError),
}
}

View file

@ -64,11 +64,11 @@ impl From<AlignArg> for DlArgument {
}
}
impl From<TNArg> for DlArgument {
fn from(item: TNArg) -> DlArgument {
impl From<TNullArg> for DlArgument {
fn from(item: TNullArg) -> DlArgument {
match item {
TNArg::Text(s) => s.into(),
TNArg::Null => NA,
TNullArg::Text(s) => s.into(),
TNullArg::Null => NA,
}
}
}

View file

@ -44,7 +44,7 @@ pub enum Tag {
Disc(NNBody),
Block(NNBody, AlignArg),
Carousel(Vec<Tag>),
Code(TBody, TNArg),
Code(TBody, TNullArg),
Pre(TBody),
Meta(TBody, TArg),
}
@ -73,7 +73,7 @@ pub enum Arg {
}
#[derive(AutoFrom, Debug, Clone, PartialEq, Eq)]
pub enum TNArg {
pub enum TNullArg {
Text(String),
Null,
}