feat: full lexer and example

This commit is contained in:
Artemy Egorov 2024-08-08 12:25:12 +03:00
parent edf799904e
commit dfe50cd0f4
9 changed files with 283 additions and 56 deletions

View file

@ -29,8 +29,7 @@ pub fn compress_zlib(data: &[u8]) -> std::io::Result<Vec<u8>> {
c.finish() c.finish()
} }
#[test] fn main() {
fn bench() {
let page = vec![ let page = vec![
H("Heading 1".into(), Hl::One), H("Heading 1".into(), Hl::One),
H("Heading 2".into(), Hl::Two), H("Heading 2".into(), Hl::Two),

129
examples/daleth.dlth Normal file
View file

@ -0,0 +1,129 @@
# multilines
#
# {text} - input is trimmed with indent
#
# {~n text} - n is number of minimum spaces to add after trimming with indent
# for each line
#
# {# text} - input not modified
#
# tag syntax
#
# tag: text body
# tag { multiline text body }
# body text always trimmed
#
# tag [ multiple tags body ]
#
# Arguments
# tag argument
#
# Tags without body and argument also supported
meta "title": Daleth syntax concept
meta "description": This document describes Daleth syntax and some tags
h1: TxtDot revolution
p: TxtDot is a cool project
# If no tag is specified, then the 'el' tag is placed
This is element
br
# if no tag is specified but a '{}' is present, then the 'p' tag is placed
# '\n' is deleted only in this format. If a break line is needed in a paragraph, use ' \n'.
{
Check Dalet too
This is one paragraph
}
{ This is another paragraph }
# ( ) for argument
row "center" [
link "https://github.com/txtdot/txtdot": Homepage
btn "https://example.com/donate" [
# tag without body
img "https://example.com/donate.png"
Donate
]
]
# [] for multiple tags
row [
[
h2: Features
ul [
Server-side page simplification
Media proxy
Image compression with Sharp
Rendering client-side apps `Vanilla, React, Vue, etc` with webder
Search with SearXNG
Handy API endpoints
No client JavaScript
Some kind of Material Design 3
Customization with plugins, see @txtdot/sdk and @txtdot/plugins
]
]
[
h2: Running
[
h3: Dev
# {} for multiline strings, indent is automatically trimmed
code {
npm install
npm run dev
}
# {~n Text} n is number of minimum spaces
code "markdown" {~4
this is codeblock
}
# {# Text} Text after "`# " not modified
code "markdown" {# this is codeblock}
]
[
h3: Production
code {
npm install
npm run build
npm run start
}
]
[
h3: Docker
code: docker compose up -d
]
]
]
# Table has custom format if text used
# +| cells | - primary column
# | cells | - secondary column
# | Element | Description | - converts to
# tcol [
# Element
# Description
# ]
table {
+| Tag | Description |
| h | Heading |
| p | Paragraph |
| img | Image |
| link | Link |
| btn | Button |
| ul | Unordered list |
| br | Line break |
+| quantity | 7 |
}

27
examples/daleth_lexer.rs Normal file
View file

@ -0,0 +1,27 @@
use ariadne::{Color, Label, Report, ReportKind, Source};
use chumsky::Parser;
use dalet::daleth::lexer::lexer;
fn main() {
let src_file = "daleth.dlth";
let src = include_str!("./daleth.dlth");
let parsed = lexer().parse(src);
match parsed.into_result() {
Ok(t) => println!("{:#?}", t),
Err(e) => e.into_iter().for_each(|e| {
Report::build(ReportKind::Error, src_file, e.span().start)
.with_code("Compiler")
.with_message(e.to_string().clone())
.with_label(
Label::new((src_file, e.span().into_range()))
.with_message(e.to_string())
.with_color(Color::Red),
)
.finish()
.print((src_file, Source::from(&src)))
.unwrap()
}),
}
}

9
examples/gemtext.rs Normal file
View file

@ -0,0 +1,9 @@
use dalet::parsers::gemtext::parse_gemtext;
fn main() {
let text = include_str!("./gemtext.gmi");
let parsed = parse_gemtext(text).unwrap();
println!("{:#?}", parsed);
}

View file

@ -4,61 +4,128 @@ pub mod types;
pub fn lexer<'src>( pub fn lexer<'src>(
) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> { ) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char, Span>>> {
let symbol = choice(( let tag = choice((
just("(").to(Token::LParen).labelled("LParen"), just("el").to(Token::El),
just(")").to(Token::RParen).labelled("RParen"), just("h").to(Token::H),
just("{").to(Token::LAngle).labelled("LAngle"), just("p").to(Token::P),
just("}").to(Token::RAngle).labelled("RAngle"), just("br").to(Token::Br),
just("[").to(Token::LSquare).labelled("LSquare"), just("ul").to(Token::Ul),
just("]").to(Token::RSquare).labelled("RSquare"), just("ol").to(Token::Ol),
just(":").to(Token::Colon).labelled("Colon"), just("row").to(Token::Row),
just("link").to(Token::Link),
just("navlink").to(Token::Navlink),
just("btn").to(Token::Btn),
just("navbtn").to(Token::Navbtn),
just("img").to(Token::Img),
just("table").to(Token::Table),
just("tcol").to(Token::Tcol),
just("tpcol").to(Token::Tpcol),
just("hr").to(Token::Hr),
just("b").to(Token::B),
just("i").to(Token::I),
just("bq").to(Token::Bq),
just("footlnk").to(Token::Footlnk),
just("footn").to(Token::Footn),
just("a").to(Token::A),
just("s").to(Token::S),
just("sup").to(Token::Sup),
just("sub").to(Token::Sub),
just("disc").to(Token::Disc),
)) ))
.labelled("symbol"); .or(choice((
just("block").to(Token::Block),
just("carousel").to(Token::Carousel),
just("code").to(Token::Code),
just("pre").to(Token::Pre),
just("meta").to(Token::Meta),
)))
.labelled("Tag");
let number = text::int(10) let symbol = choice((
.from_str() // just("(").to(Token::LParen).labelled("("),
.unwrapped() // just(")").to(Token::RParen).labelled(")"),
.map(Token::Number) just("[").to(Token::LSquare).labelled("["),
.labelled("number"); just("]").to(Token::RSquare).labelled("]"),
// just(":").to(Token::Colon).labelled(":"),
));
let argument = {
let arg_escape = just('\\')
.ignore_then(just('"'))
.labelled("Escape sequence for argument");
let number = text::int(10)
.from_str()
.unwrapped()
.map(Token::NumberArgument)
.labelled("Number argument");
let text_argument = none_of("\"\n\\")
.or(arg_escape)
.repeated()
.to_slice()
.delimited_by(just("\""), just("\""))
.map(Token::TextArgument)
.labelled("Text argument");
choice((number, text_argument))
};
let textual = { let textual = {
let escape = just('\\') let escape = just('\\')
.ignore_then(choice(( .ignore_then(just('}'))
just("\\`").to('`'.to_owned()), .labelled("Multi-line escape sequence");
just("\\]").to(']'.to_owned()),
)))
.labelled("escape sequence");
let text = none_of("]\n") let text = none_of("\n").repeated().to_slice();
.or(escape.clone())
let text_body = text
.delimited_by(just(':'), just('\n'))
.map(Token::TextBody)
.labelled("One line text body");
let text_tag = text
.then_ignore(just('\n'))
.map(Token::TextTag)
.labelled("Text tag");
let multiline_text_body = none_of("}\\")
.or(escape)
.repeated() .repeated()
.to_slice() .labelled("Body of multiline text");
.map(Token::Text);
let multiline_text = none_of("`").or(escape).repeated(); let mlms_n = just("{~")
.ignore_then(text::int(10).from_str().unwrapped())
.labelled("Minimum spaces number");
let mltext = multiline_text let mlmstext = mlms_n
.then(multiline_text_body.clone().to_slice())
.then_ignore(just("}"))
.map(|(n, t)| Token::MLMSText(n, t))
.labelled("Multi line text with min spaces");
let mltext = multiline_text_body
.clone() .clone()
.delimited_by(just('`'), just('`'))
.to_slice() .to_slice()
.delimited_by(just('{'), just('}'))
.map(Token::MLText) .map(Token::MLText)
.labelled("multiline text"); .labelled("Multiline text");
let mlmstext = multiline_text let rmltext = multiline_text_body
.delimited_by(just("`#"), just('`'))
.to_slice() .to_slice()
.delimited_by(just("{#"), just('}'))
.map(Token::RMLText) .map(Token::RMLText)
.labelled("raw multiline text"); .labelled("Raw multiline text");
choice((mltext, mlmstext, text)) choice((mlmstext, mltext, rmltext, text_body, text_tag))
}; };
let comment = just("#") let comment = none_of("\n")
.then(none_of("\n").repeated()) .repeated()
.to_slice() .to_slice()
.delimited_by(just('#'), just('\n'))
.map(Token::Comment); .map(Token::Comment);
let token = choice((symbol, number, textual, comment)); let token = choice((comment, symbol, tag, argument, textual));
token token
.map_with(|t, e| (t, e.span())) .map_with(|t, e| (t, e.span()))

View file

@ -7,23 +7,22 @@ pub type Spanned<T> = (T, Span);
pub enum Token<'src> { pub enum Token<'src> {
// Symbols // Symbols
/// ( /// (
LParen, // LParen,
/// ) /// )
RParen, // RParen,
/// {
LAngle,
/// }
RAngle,
/// [ /// [
LSquare, LSquare,
/// ] /// ]
RSquare, RSquare,
/// : /// :
Colon, // Colon,
// Values // Arguments
Number(u8), NumberArgument(u8),
Text(&'src str), TextArgument(&'src str),
// Body
TextBody(&'src str),
/// Multi Line text /// Multi Line text
MLText(&'src str), MLText(&'src str),
/// Multi Line with min spaces text /// Multi Line with min spaces text
@ -33,6 +32,7 @@ pub enum Token<'src> {
/// Special /// Special
Comment(&'src str), Comment(&'src str),
TextTag(&'src str),
// Tags // Tags
El, El,
@ -67,3 +67,9 @@ pub enum Token<'src> {
Pre, Pre,
Meta, Meta,
} }
#[derive(Clone, Debug, PartialEq)]
pub enum Argument<'src> {
Number(u8),
Argument(&'src str),
}

View file

@ -1,10 +0,0 @@
use dalet::parsers::gemtext::parse_gemtext;
#[test]
fn gem_text() {
let text = include_str!("./gemtext.gmi");
let _ = parse_gemtext(text).unwrap();
// println!("{:#?}", parsed);
}