mirror of
https://github.com/TxtDot/dalet-rs.git
synced 2024-11-22 08:46:23 +03:00
feat: gemtext parser, pre tag
This commit is contained in:
parent
856534c22f
commit
6265701499
9 changed files with 258 additions and 1 deletions
138
Cargo.lock
generated
138
Cargo.lock
generated
|
@ -8,6 +8,33 @@ version = "1.0.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ahash"
|
||||||
|
version = "0.8.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"once_cell",
|
||||||
|
"version_check",
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "allocator-api2"
|
||||||
|
version = "0.2.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anstream"
|
name = "anstream"
|
||||||
version = "0.6.15"
|
version = "0.6.15"
|
||||||
|
@ -94,6 +121,19 @@ version = "1.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "chumsky"
|
||||||
|
version = "1.0.0-alpha.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c7b80276986f86789dc56ca6542d53bba9cda3c66091ebbe7bd96fc1bdf20f1f"
|
||||||
|
dependencies = [
|
||||||
|
"hashbrown",
|
||||||
|
"regex-automata",
|
||||||
|
"serde",
|
||||||
|
"stacker",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.5.13"
|
version = "4.5.13"
|
||||||
|
@ -154,6 +194,7 @@ name = "dalet"
|
||||||
version = "1.0.0-pre4"
|
version = "1.0.0-pre4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bincode",
|
"bincode",
|
||||||
|
"chumsky",
|
||||||
"clap",
|
"clap",
|
||||||
"flate2",
|
"flate2",
|
||||||
"num_enum",
|
"num_enum",
|
||||||
|
@ -184,6 +225,10 @@ name = "hashbrown"
|
||||||
version = "0.14.5"
|
version = "0.14.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||||
|
dependencies = [
|
||||||
|
"ahash",
|
||||||
|
"allocator-api2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
|
@ -267,6 +312,12 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "paste"
|
name = "paste"
|
||||||
version = "1.0.15"
|
version = "1.0.15"
|
||||||
|
@ -297,6 +348,15 @@ dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "psm"
|
||||||
|
version = "0.1.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "quote"
|
||||||
version = "1.0.36"
|
version = "1.0.36"
|
||||||
|
@ -306,6 +366,23 @@ dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.7.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rmp"
|
name = "rmp"
|
||||||
version = "0.8.14"
|
version = "0.8.14"
|
||||||
|
@ -359,6 +436,19 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "stacker"
|
||||||
|
version = "0.1.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"psm",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
|
@ -405,6 +495,34 @@ version = "0.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.52.0"
|
version = "0.52.0"
|
||||||
|
@ -487,6 +605,26 @@ dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy"
|
||||||
|
version = "0.7.35"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
|
||||||
|
dependencies = [
|
||||||
|
"zerocopy-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy-derive"
|
||||||
|
version = "0.7.35"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zstd"
|
name = "zstd"
|
||||||
version = "0.13.2"
|
version = "0.13.2"
|
||||||
|
|
|
@ -17,6 +17,7 @@ num_enum = "0.7.3"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_repr = "0.1"
|
serde_repr = "0.1"
|
||||||
zstd = "0.13.2"
|
zstd = "0.13.2"
|
||||||
|
chumsky = { version = "=1.0.0-alpha.7", features = ["label"], optional = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
rmp-serde = { version = "1.3.0" }
|
rmp-serde = { version = "1.3.0" }
|
||||||
|
@ -25,6 +26,7 @@ flate2 = "1.0"
|
||||||
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["types", "daletpack"]
|
default = ["types", "daletpack", "parsers"]
|
||||||
|
parsers = ["dep:chumsky", "types"]
|
||||||
types = []
|
types = []
|
||||||
daletpack = ["types"]
|
daletpack = ["types"]
|
||||||
|
|
|
@ -36,6 +36,7 @@ pub enum Tag {
|
||||||
Bl(NotNullBody, AlignArgument),
|
Bl(NotNullBody, AlignArgument),
|
||||||
Carousel(Vec<Tag>),
|
Carousel(Vec<Tag>),
|
||||||
Code(String, TextOrNullArgument),
|
Code(String, TextOrNullArgument),
|
||||||
|
Pre(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait ToDaletl {
|
pub trait ToDaletl {
|
||||||
|
@ -78,6 +79,7 @@ impl ToDaletlTag for Tag {
|
||||||
Tag::Bl(b, a) => t_new(Tid::Bl, b.to_daletl_body(), a.to_daletl_argument()),
|
Tag::Bl(b, a) => t_new(Tid::Bl, b.to_daletl_body(), a.to_daletl_argument()),
|
||||||
Tag::Carousel(b) => t_new(Tid::Carousel, b.to_daletl_body(), NA),
|
Tag::Carousel(b) => t_new(Tid::Carousel, b.to_daletl_body(), NA),
|
||||||
Tag::Code(s, a) => t_new(Tid::Code, s.to_daletl_body(), a.to_daletl_argument()),
|
Tag::Code(s, a) => t_new(Tid::Code, s.to_daletl_body(), a.to_daletl_argument()),
|
||||||
|
Tag::Pre(s) => t_new(Tid::Pre, s.to_daletl_body(), NA),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,4 +92,5 @@ pub enum Tid {
|
||||||
Bl,
|
Bl,
|
||||||
Carousel,
|
Carousel,
|
||||||
Code,
|
Code,
|
||||||
|
Pre,
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,3 +6,6 @@ pub mod abstractions;
|
||||||
|
|
||||||
#[cfg(feature = "daletpack")]
|
#[cfg(feature = "daletpack")]
|
||||||
pub mod daletpack;
|
pub mod daletpack;
|
||||||
|
|
||||||
|
#[cfg(feature = "parsers")]
|
||||||
|
pub mod parsers;
|
||||||
|
|
68
src/parsers/gemtext.rs
Normal file
68
src/parsers/gemtext.rs
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
use crate::abstractions::{Body, HeadingLevel, NotNullBody, Tag};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum GemTextParseError {
|
||||||
|
InvalidLink,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_gemtext(s: String) -> Result<Vec<Tag>, GemTextParseError> {
|
||||||
|
let mut page: Vec<Tag> = Vec::new();
|
||||||
|
let mut preformatted = false;
|
||||||
|
let mut preformatted_text = String::new();
|
||||||
|
|
||||||
|
let mut before_is_ordered_list = false;
|
||||||
|
let mut ordered_list: Vec<Tag> = Vec::new();
|
||||||
|
|
||||||
|
for line in s.lines() {
|
||||||
|
let mut line = line.trim().to_owned();
|
||||||
|
|
||||||
|
if before_is_ordered_list && !line.starts_with("* ") {
|
||||||
|
page.push(Tag::Ul(ordered_list.clone()));
|
||||||
|
before_is_ordered_list = false;
|
||||||
|
ordered_list.clear();
|
||||||
|
} else if preformatted && !line.starts_with("```") {
|
||||||
|
preformatted_text.push_str(&line);
|
||||||
|
preformatted_text.push('\n');
|
||||||
|
} else if line.starts_with("=>") {
|
||||||
|
let body = line.split_off(2);
|
||||||
|
let mut body = body.trim().splitn(2, " ");
|
||||||
|
|
||||||
|
let url = body.next().ok_or(GemTextParseError::InvalidLink)?.trim();
|
||||||
|
|
||||||
|
match body.next() {
|
||||||
|
Some(label) => page.push(Tag::Link(
|
||||||
|
Body::Text(label.trim().to_owned()),
|
||||||
|
url.to_owned(),
|
||||||
|
)),
|
||||||
|
None => page.push(Tag::Link(Body::Null, url.to_owned())),
|
||||||
|
};
|
||||||
|
} else if line.starts_with("# ") {
|
||||||
|
let body = line.split_off(2);
|
||||||
|
page.push(Tag::H(body.trim().to_owned(), HeadingLevel::One));
|
||||||
|
} else if line.starts_with("## ") {
|
||||||
|
let body = line.split_off(3);
|
||||||
|
page.push(Tag::H(body.trim().to_owned(), HeadingLevel::Two));
|
||||||
|
} else if line.starts_with("### ") {
|
||||||
|
let body = line.split_off(4);
|
||||||
|
page.push(Tag::H(body.trim().to_owned(), HeadingLevel::Three));
|
||||||
|
} else if line.starts_with("* ") {
|
||||||
|
before_is_ordered_list = true;
|
||||||
|
let body = line.split_off(2);
|
||||||
|
ordered_list.push(Tag::El(NotNullBody::Text(body)));
|
||||||
|
} else if line.starts_with("> ") {
|
||||||
|
let body = line.split_off(2);
|
||||||
|
page.push(Tag::Bq(NotNullBody::Text(body)));
|
||||||
|
} else if line.starts_with("```") {
|
||||||
|
if preformatted {
|
||||||
|
page.push(Tag::Pre(preformatted_text.clone()));
|
||||||
|
preformatted_text.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
preformatted = !preformatted;
|
||||||
|
} else if !line.is_empty() {
|
||||||
|
page.push(Tag::P(NotNullBody::Text(line)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(page)
|
||||||
|
}
|
1
src/parsers/mod.rs
Normal file
1
src/parsers/mod.rs
Normal file
|
@ -0,0 +1 @@
|
||||||
|
pub mod gemtext;
|
32
tests/gemtext.gmi
Normal file
32
tests/gemtext.gmi
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
=> https://example.com A cool website
|
||||||
|
=> gopher://example.com An even cooler gopherhole
|
||||||
|
=> gemini://example.com A supremely cool Gemini capsule
|
||||||
|
=> sftp://example.com
|
||||||
|
|
||||||
|
Hi
|
||||||
|
|
||||||
|
=>https://example.com A cool website
|
||||||
|
=>gopher://example.com An even cooler gopherhole
|
||||||
|
=> gemini://example.com A supremely cool Gemini capsule
|
||||||
|
=> sftp://example.com
|
||||||
|
|
||||||
|
# Heading
|
||||||
|
|
||||||
|
## Sub-heading
|
||||||
|
|
||||||
|
### Sub-sub-heading
|
||||||
|
|
||||||
|
* Mercury
|
||||||
|
* Gemini
|
||||||
|
* Apollo
|
||||||
|
|
||||||
|
> Gemtext supports blockquotes. The quoted content is written as a single long line, which begins with a single > character
|
||||||
|
|
||||||
|
```
|
||||||
|
preformatted
|
||||||
|
=> ()
|
||||||
|
# false heading
|
||||||
|
text
|
||||||
|
```
|
||||||
|
|
||||||
|
This is paragraph
|
10
tests/gemtext.rs
Normal file
10
tests/gemtext.rs
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
use dalet::parsers::gemtext::parse_gemtext;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn gem_text() {
|
||||||
|
let text = include_str!("./gemtext.gmi");
|
||||||
|
|
||||||
|
let parsed = parse_gemtext(text.to_owned()).unwrap();
|
||||||
|
|
||||||
|
println!("{:#?}", parsed);
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue