From 1ff6ba9c88377b981867605fc8bb794725ad0362 Mon Sep 17 00:00:00 2001 From: Artemy Egorov Date: Tue, 6 Aug 2024 10:16:24 +0300 Subject: [PATCH] feat: decode daletpack --- src/daletpack/decode.rs | 172 +++++++++++++++++++++ src/daletpack/encode.rs | 10 +- src/daletpack/mod.rs | 3 + src/daletpack/types.rs | 41 ++++- src/daletpack/utils.rs | 8 +- src/traits/mod.rs | 2 +- src/traits/{from_typed.rs => to_daletl.rs} | 0 src/typed.rs | 6 + tests/bench.rs | 17 +- tests/gemtext.rs | 4 +- 10 files changed, 244 insertions(+), 19 deletions(-) create mode 100644 src/daletpack/decode.rs rename src/traits/{from_typed.rs => to_daletl.rs} (100%) diff --git a/src/daletpack/decode.rs b/src/daletpack/decode.rs new file mode 100644 index 0000000..9352b52 --- /dev/null +++ b/src/daletpack/decode.rs @@ -0,0 +1,172 @@ +use std::u32::MAX; + +use crate::daletl::{DlArgument, DlBody, DlPage, DlTag, DlTid}; + +use super::{utils, DaletPackDecodeError, TypeId}; + +pub struct Decoder<'a> { + data: Box + 'a>, +} + +impl<'a> Decoder<'a> { + pub fn new(data: &[u8]) -> Result { + let data = + utils::decompress_zstd(data).map_err(|_| DaletPackDecodeError::ZstdDecompressError)?; + Ok(Self { + data: Box::new(data.into_iter()), + }) + } + + pub fn decode(&mut self) -> Result { + let mut array: Vec = Vec::new(); + + for _ in 0..MAX { + let typeid = self.data.next(); + + match typeid { + Some(typeid) => match typeid.try_into()? { + TypeId::Text => array.push(DlTag::new( + DlTid::El, + self.read_text()?.into(), + DlArgument::Null, + )), + TypeId::Tags => array.push(DlTag::new( + DlTid::El, + self.read_tag_array()?.into(), + DlArgument::Null, + )), + TypeId::TagId => array.push(self.read_tag_with_id()?), + TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?), + TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?), + TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?), + + _ => Err(DaletPackDecodeError::InvalidSchema)?, + }, + None => break, + } + } + + Ok(DlPage { data: array }) + } + + pub fn read_body(&mut self) -> Result { + let typeid: TypeId = self + .data + .next() + .ok_or(DaletPackDecodeError::InvalidSchema)? + .try_into()?; + + let value = match typeid { + TypeId::Text => DlBody::Text(self.read_text()?), + TypeId::Tags => DlBody::Tags(self.read_tag_array()?), + _ => Err(DaletPackDecodeError::InvalidArgument)?, + }; + + Ok(value) + } + + pub fn read_arg(&mut self) -> Result { + let typeid: TypeId = self + .data + .next() + .ok_or(DaletPackDecodeError::InvalidSchema)? + .try_into()?; + + let value = match typeid { + TypeId::Text => DlArgument::Text(self.read_text()?), + TypeId::Number => DlArgument::Number(self.read_number()?), + _ => Err(DaletPackDecodeError::InvalidArgument)?, + }; + + Ok(value) + } + + fn read_number(&mut self) -> Result { + self.data.next().ok_or(DaletPackDecodeError::InvalidSchema) + } + + fn read_text(&mut self) -> Result { + let mut str = String::new(); + + for _ in 0..MAX { + let val = self + .data + .next() + .ok_or(DaletPackDecodeError::InvalidTextSchema)?; + + if val == TypeId::TextEnd as u8 { + break; + } + + str.push(val as char); + } + + Ok(str) + } + + fn read_tag_array(&mut self) -> Result, DaletPackDecodeError> { + let mut array = Vec::new(); + + for _ in 0..MAX { + let typeid: TypeId = self + .data + .next() + .ok_or(DaletPackDecodeError::InvalidTagsSchema)? + .try_into()?; + + match typeid { + TypeId::Text => array.push(DlTag::new( + DlTid::El, + self.read_text()?.into(), + DlArgument::Null, + )), + TypeId::Tags => array.push(DlTag::new( + DlTid::El, + self.read_tag_array()?.into(), + DlArgument::Null, + )), + TypeId::TagId => array.push(self.read_tag_with_id()?), + TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?), + TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?), + TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?), + + TypeId::TagsEnd => break, + _ => Err(DaletPackDecodeError::InvalidSchema)?, + } + } + + Ok(array) + } + + fn read_tag_with_id(&mut self) -> Result { + Ok(DlTag::new( + self.read_number()?.try_into()?, + DlBody::Null, + DlArgument::Null, + )) + } + + fn read_tag_with_id_body(&mut self) -> Result { + Ok(DlTag::new( + self.read_number()?.try_into()?, + self.read_body()?, + DlArgument::Null, + )) + } + + fn read_tag_with_id_argument(&mut self) -> Result { + Ok(DlTag::new( + self.read_number()?.try_into()?, + DlBody::Null, + self.read_arg()?, + )) + } + + fn read_full_tag(&mut self) -> Result { + Ok(DlTag::new( + self.read_number()?.try_into()?, + self.read_body()?, + self.read_arg()?, + )) + } +} diff --git a/src/daletpack/encode.rs b/src/daletpack/encode.rs index 44c52a6..51bec42 100644 --- a/src/daletpack/encode.rs +++ b/src/daletpack/encode.rs @@ -22,7 +22,7 @@ pub fn encode_no_compress(page: &DlPage) -> Result, DaletPackError> { } fn write_int(bv: &mut Vec, n: u8) { - bv.push(1); + bv.push(TypeId::Number as u8); bv.push(n); } @@ -33,9 +33,9 @@ fn write_str(bv: &mut Vec, string: &String) -> Result<(), DaletPackError> { return Err(DaletPackError::StrMaxSizeExceeded); } - bv.push(TypeId::Str as u8); + bv.push(TypeId::Text as u8); bv.extend_from_slice(string.as_bytes()); - bv.push(TypeId::StrEnd as u8); + bv.push(TypeId::TextEnd as u8); Ok(()) } @@ -45,13 +45,13 @@ fn write_array(bv: &mut Vec, arr: &Vec) -> Result<(), DaletPackError> return Err(DaletPackError::ArrMaxSizeExceeded); } - bv.push(TypeId::TagArray as u8); + bv.push(TypeId::Tags as u8); for tag in arr { write_tag(bv, tag)?; } - bv.push(TypeId::TagArrayEnd as u8); + bv.push(TypeId::TagsEnd as u8); Ok(()) } diff --git a/src/daletpack/mod.rs b/src/daletpack/mod.rs index ea2f18f..983e1e6 100644 --- a/src/daletpack/mod.rs +++ b/src/daletpack/mod.rs @@ -1,6 +1,9 @@ +mod decode; mod encode; + mod types; pub mod utils; +pub use decode::*; pub use encode::*; pub use types::*; diff --git a/src/daletpack/types.rs b/src/daletpack/types.rs index e87ea4d..ca675f6 100644 --- a/src/daletpack/types.rs +++ b/src/daletpack/types.rs @@ -1,4 +1,6 @@ -use num_enum::TryFromPrimitive; +use num_enum::{TryFromPrimitive, TryFromPrimitiveError}; + +use crate::daletl::DlTid; #[derive(Debug, Clone, PartialEq, Eq)] pub enum DaletPackError { @@ -6,19 +8,46 @@ pub enum DaletPackError { ArrMaxSizeExceeded, PageMaxSizeExceeded, ZstdCompressError, + ZstdDecompressError, WriteNullBody, WriteNullArgument, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DaletPackDecodeError { + ZstdDecompressError, + + InvalidSchema, + InvalidTextSchema, + InvalidTagsSchema, + + UnknownTypeId, + UnknownTagId, + + InvalidArgument, +} + +impl From> for DaletPackDecodeError { + fn from(_: TryFromPrimitiveError) -> Self { + DaletPackDecodeError::UnknownTypeId + } +} + +impl From> for DaletPackDecodeError { + fn from(_: TryFromPrimitiveError) -> Self { + DaletPackDecodeError::UnknownTagId + } +} + #[derive(Debug, Clone, PartialEq, Eq, TryFromPrimitive, Copy)] #[repr(u8)] pub enum TypeId { - StrEnd = 0, - Str, - Int8, - TagArray, - TagArrayEnd, + TextEnd = 0, + Text, + Number, + Tags, + TagsEnd, TagId, TagIdBody, TagIdArgument, diff --git a/src/daletpack/utils.rs b/src/daletpack/utils.rs index 3d1084a..0570dd8 100644 --- a/src/daletpack/utils.rs +++ b/src/daletpack/utils.rs @@ -1,3 +1,9 @@ -pub fn compress_zstd(data: &Vec) -> std::io::Result> { +use std::u32::MAX; + +pub fn compress_zstd(data: &[u8]) -> std::io::Result> { zstd::bulk::compress(data, 22) } + +pub fn decompress_zstd(data: &[u8]) -> std::io::Result> { + zstd::bulk::decompress(data, MAX as usize) +} diff --git a/src/traits/mod.rs b/src/traits/mod.rs index be81ebd..23dd098 100644 --- a/src/traits/mod.rs +++ b/src/traits/mod.rs @@ -1,3 +1,3 @@ mod from_daletl; -mod from_typed; mod is_null_daletl; +mod to_daletl; diff --git a/src/traits/from_typed.rs b/src/traits/to_daletl.rs similarity index 100% rename from src/traits/from_typed.rs rename to src/traits/to_daletl.rs diff --git a/src/typed.rs b/src/typed.rs index ffd5b38..eff65ef 100644 --- a/src/typed.rs +++ b/src/typed.rs @@ -6,6 +6,12 @@ pub struct Page { pub data: Vec, } +impl Page { + pub fn new(data: Vec) -> Self { + Self { data } + } +} + pub struct ConversionError; #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/tests/bench.rs b/tests/bench.rs index 6b06ece..fc14c63 100644 --- a/tests/bench.rs +++ b/tests/bench.rs @@ -1,6 +1,6 @@ use dalet::{ daletpack::*, - typed::{Hl, Page, TNArg, Tag::*}, + typed::{Hl, TNArg, Tag::*}, }; use flate2::Compression; use std::io::Write; @@ -11,7 +11,7 @@ macro_rules! iprint { let start = std::time::Instant::now(); let result = $func; let elapsed = start.elapsed(); - println!("{} ({:#?}): {} bytes", $name, elapsed, result.len()); + println!("{} ({:#?}): {}", $name, elapsed, result.len()); result }}; @@ -32,7 +32,7 @@ pub fn compress_zlib(data: &Vec) -> std::io::Result> { #[test] fn bench() { let page = vec![ - H("I am heading".into(), Hl::One), + H("Heading 1".into(), Hl::One), H("Heading 2".into(), Hl::Two), P(vec![ El("Some ".into()), @@ -87,7 +87,7 @@ fn bench() { println!(); iprint!("Markdown zstd", utils::compress_zstd(&markdown).unwrap()); - iprint!("Daletpack zstd", utils::compress_zstd(&daletpack).unwrap()); + let daletpack = iprint!("Daletpack zstd", utils::compress_zstd(&daletpack).unwrap()); iprint!( "Messagepack zstd", utils::compress_zstd(&messagepack).unwrap() @@ -110,4 +110,13 @@ fn bench() { compress_deflate(&messagepack).unwrap() ); iprint!("Bincode deflate", compress_deflate(&bincode).unwrap()); + + println!(); + + let decoded = iprint!( + "Daletpack decode", + Decoder::new(&daletpack).unwrap().decode().unwrap().data + ); + + println!("{:#?}", decoded); } diff --git a/tests/gemtext.rs b/tests/gemtext.rs index ffb2bfe..bda2175 100644 --- a/tests/gemtext.rs +++ b/tests/gemtext.rs @@ -4,7 +4,7 @@ use dalet::parsers::gemtext::parse_gemtext; fn gem_text() { let text = include_str!("./gemtext.gmi"); - let parsed = parse_gemtext(&text).unwrap(); + let _ = parse_gemtext(&text).unwrap(); - println!("{:#?}", parsed); + // println!("{:#?}", parsed); }