From a2dfb567c8a4d22ee3da9a0ece4e67d4f3241ee2 Mon Sep 17 00:00:00 2001 From: Artemy Egorov Date: Wed, 9 Oct 2024 16:43:35 +0300 Subject: [PATCH] fix: decoder --- Cargo.lock | 46 -------- Cargo.toml | 4 +- examples/bench.rs | 48 +++----- src/daletpack/decode.rs | 253 +++++++++++++++++++--------------------- src/daletpack/encode.rs | 70 ++++------- src/daletpack/types.rs | 3 - src/daletpack/utils.rs | 14 ++- 7 files changed, 175 insertions(+), 263 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5dbbe69..c808883 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -121,8 +121,6 @@ version = "1.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" dependencies = [ - "jobserver", - "libc", "shlex", ] @@ -214,7 +212,6 @@ dependencies = [ "rmp-serde", "serde", "serde_repr", - "zstd", ] [[package]] @@ -281,15 +278,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "jobserver" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" -dependencies = [ - "libc", -] - [[package]] name = "libc" version = "0.2.159" @@ -353,12 +341,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" -[[package]] -name = "pkg-config" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" - [[package]] name = "proc-macro-crate" version = "3.2.0" @@ -658,31 +640,3 @@ dependencies = [ "quote", "syn", ] - -[[package]] -name = "zstd" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.13+zstd.1.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/Cargo.toml b/Cargo.toml index 98ed793..9f2c135 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,8 @@ clap = { version = "4.5.13", features = ["derive"] } num_enum = "0.7.3" serde = { version = "1.0", features = ["derive"] } serde_repr = "0.1" -zstd = "0.13.2" +flate2 = "1.0" + enum-procs = "0.3.0" chumsky = { version = "=1.0.0-alpha.7", features = ["label"], optional = true } @@ -29,7 +30,6 @@ ariadne = { version = "0.4.1", optional = true } [dev-dependencies] rmp-serde = "1.3.0" bincode = "1.3.3" -flate2 = "1.0" [features] default = ["types", "daletpack", "parsers", "daleth"] diff --git a/examples/bench.rs b/examples/bench.rs index 2e97807..6b3c230 100644 --- a/examples/bench.rs +++ b/examples/bench.rs @@ -2,8 +2,6 @@ use dalet::{ daletpack::*, typed::{Hl, TNullArg, Tag::*}, }; -use flate2::Compression; -use std::io::Write; #[macro_export] macro_rules! iprint { @@ -21,34 +19,11 @@ macro_rules! iprint { macro_rules! bench { ($name:expr, $func:expr) => {{ let res = iprint!($name, $func); - iprint!( - $name.to_owned() + " zstd", - utils::compress_zstd(&res).unwrap() - ); - iprint!($name.to_owned() + " zlib", compress_zlib(&res).unwrap()); - iprint!( - $name.to_owned() + " deflate", - compress_deflate(&res).unwrap() - ); - - println!(); res }}; } -fn compress_deflate(data: &[u8]) -> std::io::Result> { - let mut c = flate2::write::DeflateEncoder::new(Vec::new(), Compression::default()); - c.write_all(data)?; - c.finish() -} - -fn compress_zlib(data: &[u8]) -> std::io::Result> { - let mut c = flate2::write::ZlibEncoder::new(Vec::new(), Compression::default()); - c.write_all(data)?; - c.finish() -} - fn main() { let page = vec![ H("Heading 1".into(), Hl::One), @@ -98,8 +73,23 @@ fn main() { let dalet_page = page.into(); - bench!("Markdown", include_str!("./bench.md").as_bytes().to_vec()); - bench!("Daletpack", encode_no_compress(&dalet_page).unwrap()); - bench!("Messagepack", rmp_serde::to_vec(&dalet_page).unwrap()); - bench!("Bincode", bincode::serialize(&dalet_page).unwrap()); + let encoded = bench!("Daletpack", encode(&dalet_page).unwrap()); + + assert_eq!( + Decoder::new(&encoded).unwrap().decode().unwrap(), + dalet_page + ); + + bench!( + "Markdown", + utils::compress(&include_str!("./bench.md").as_bytes().to_vec()).unwrap() + ); + bench!( + "Messagepack", + utils::compress(&rmp_serde::to_vec(&dalet_page).unwrap()).unwrap() + ); + bench!( + "Bincode", + utils::compress(&bincode::serialize(&dalet_page).unwrap()).unwrap() + ); } diff --git a/src/daletpack/decode.rs b/src/daletpack/decode.rs index 0de5f0f..a5d71b6 100644 --- a/src/daletpack/decode.rs +++ b/src/daletpack/decode.rs @@ -1,6 +1,6 @@ -use crate::daletl::{DlPage, DlTag}; +use crate::daletl::{DlArg, DlBody, DlPage, DlTag, DlTid}; -use super::{utils, DaletPackDecodeError}; +use super::{utils, DaletPackDecodeError, TypeId}; pub struct Decoder<'a> { data: Box + 'a>, @@ -9,162 +9,151 @@ pub struct Decoder<'a> { impl<'a> Decoder<'a> { pub fn new(data: &[u8]) -> Result { let data = - utils::decompress_zstd(data).map_err(|_| DaletPackDecodeError::ZstdDecompressError)?; + utils::decompress(data).map_err(|_| DaletPackDecodeError::ZstdDecompressError)?; Ok(Self { data: Box::new(data.into_iter()), }) } pub fn decode(&mut self) -> Result { - let array: Vec = Vec::new(); - - // for _ in 0..u32::MAX { - // let typeid = self.data.next(); - - // match typeid { - // Some(typeid) => match typeid.try_into()? { - // TypeId::Text => array.push(DlTag::new( - // DlTid::El, - // self.read_text()?.into(), - // DlArgument::Null, - // )), - // TypeId::Tags => array.push(DlTag::new( - // DlTid::El, - // self.read_tag_array()?.into(), - // DlArgument::Null, - // )), - // TypeId::TagId => array.push(self.read_tag_with_id()?), - // TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?), - // TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?), - // TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?), - - // _ => Err(DaletPackDecodeError::InvalidSchema)?, - // }, - // None => break, - // } - // } - - Ok(DlPage { data: array }) + Ok(DlPage { + data: self.read_tags(false)?, + }) } - // pub fn read_body(&mut self) -> Result { - // let typeid: TypeId = self - // .data - // .next() - // .ok_or(DaletPackDecodeError::InvalidSchema)? - // .try_into()?; + fn read_tags(&mut self, with_end: bool) -> Result, DaletPackDecodeError> { + let mut data: Vec = Vec::new(); - // let value = match typeid { - // TypeId::Text => DlBody::Text(self.read_text()?), - // TypeId::Tags => DlBody::Tags(self.read_tag_array()?), - // _ => Err(DaletPackDecodeError::InvalidArgument)?, - // }; + for _ in 0..u32::MAX { + let typeid = self.data.next(); - // Ok(value) - // } + let tag = match typeid { + Some(typeid) => match typeid.try_into()? { + TypeId::JustId => self.tag_just_id()?, + TypeId::TextBody => self.tag_text_body()?, + TypeId::TagsBody => self.tag_tags_body()?, + TypeId::TextArg => self.tag_text_arg()?, + TypeId::NumberArg => self.tag_number_arg()?, + TypeId::TextText => self.tag_text_text()?, + TypeId::TagsText => self.tag_tags_text()?, + TypeId::TextNumber => self.tag_text_number()?, + TypeId::TagsNumber => self.tag_tags_number()?, - // pub fn read_arg(&mut self) -> Result { - // let typeid: TypeId = self - // .data - // .next() - // .ok_or(DaletPackDecodeError::InvalidSchema)? - // .try_into()?; + TypeId::EndOfBody => { + if with_end { + break; + } else { + Err(DaletPackDecodeError::InvalidSchema)? + } + } + }, + None => { + if with_end { + Err(DaletPackDecodeError::InvalidSchema)? + } else { + break; + } + } + }; - // let value = match typeid { - // TypeId::Text => DlArgument::Text(self.read_text()?), - // TypeId::Number => DlArgument::Number(self.read_number()?), - // _ => Err(DaletPackDecodeError::InvalidArgument)?, - // }; + data.push(tag); + } - // Ok(value) - // } + Ok(data) + } - // fn read_number(&mut self) -> Result { - // self.data.next().ok_or(DaletPackDecodeError::InvalidSchema) - // } + fn tag_tags_number(&mut self) -> Result { + Ok(DlTag::new( + self.read_tag_id()?, + self.read_tags(true)?.into(), + self.read_number()?.into(), + )) + } - // fn read_text(&mut self) -> Result { - // let mut str = String::new(); + fn tag_text_number(&mut self) -> Result { + Ok(DlTag::new( + self.read_tag_id()?, + self.read_text()?.into(), + self.read_number()?.into(), + )) + } - // for _ in 0..u32::MAX { - // let val = self - // .data - // .next() - // .ok_or(DaletPackDecodeError::InvalidTextSchema)?; + fn tag_tags_text(&mut self) -> Result { + Ok(DlTag::new( + self.read_tag_id()?, + self.read_tags(true)?.into(), + self.read_text()?.into(), + )) + } - // if val == TypeId::TextEnd as u8 { - // break; - // } + fn tag_text_text(&mut self) -> Result { + Ok(DlTag::new( + self.read_tag_id()?, + self.read_text()?.into(), + self.read_text()?.into(), + )) + } - // str.push(val as char); - // } + fn tag_number_arg(&mut self) -> Result { + Ok(DlTag::new( + self.read_tag_id()?, + DlBody::Null, + self.read_number()?.into(), + )) + } - // Ok(str) - // } + fn tag_text_arg(&mut self) -> Result { + Ok(DlTag::new( + self.read_tag_id()?, + DlBody::Null, + self.read_text()?.into(), + )) + } - // fn read_tag_array(&mut self) -> Result, DaletPackDecodeError> { - // let mut array = Vec::new(); + fn tag_tags_body(&mut self) -> Result { + Ok(DlTag::new( + self.read_tag_id()?, + self.read_tags(true)?.into(), + DlArg::Null, + )) + } - // for _ in 0..u32::MAX { - // let typeid: TypeId = self - // .data - // .next() - // .ok_or(DaletPackDecodeError::InvalidTagsSchema)? - // .try_into()?; + fn tag_text_body(&mut self) -> Result { + Ok(DlTag::new( + self.read_tag_id()?, + self.read_text()?.into(), + DlArg::Null, + )) + } - // match typeid { - // TypeId::Text => array.push(DlTag::new( - // DlTid::El, - // self.read_text()?.into(), - // DlArgument::Null, - // )), - // TypeId::Tags => array.push(DlTag::new( - // DlTid::El, - // self.read_tag_array()?.into(), - // DlArgument::Null, - // )), - // TypeId::TagId => array.push(self.read_tag_with_id()?), - // TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?), - // TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?), - // TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?), + fn tag_just_id(&mut self) -> Result { + Ok(DlTag::new(self.read_tag_id()?, DlBody::Null, DlArg::Null)) + } - // TypeId::TagsEnd => break, - // _ => Err(DaletPackDecodeError::InvalidSchema)?, - // } - // } + fn read_tag_id(&mut self) -> Result { + Ok(self.read_number()?.try_into()?) + } - // Ok(array) - // } + fn read_number(&mut self) -> Result { + self.data.next().ok_or(DaletPackDecodeError::InvalidSchema) + } - // fn read_tag_with_id(&mut self) -> Result { - // Ok(DlTag::new( - // self.read_number()?.try_into()?, - // DlBody::Null, - // DlArgument::Null, - // )) - // } + fn read_text(&mut self) -> Result { + let mut str = String::new(); - // fn read_tag_with_id_body(&mut self) -> Result { - // Ok(DlTag::new( - // self.read_number()?.try_into()?, - // self.read_body()?, - // DlArgument::Null, - // )) - // } + for _ in 0..u32::MAX { + let val = self + .data + .next() + .ok_or(DaletPackDecodeError::InvalidTextSchema)?; - // fn read_tag_with_id_argument(&mut self) -> Result { - // Ok(DlTag::new( - // self.read_number()?.try_into()?, - // DlBody::Null, - // self.read_arg()?, - // )) - // } + if val == TypeId::EndOfBody as u8 { + break; + } - // fn read_full_tag(&mut self) -> Result { - // Ok(DlTag::new( - // self.read_number()?.try_into()?, - // self.read_body()?, - // self.read_arg()?, - // )) - // } + str.push(val as char); + } + + Ok(str) + } } diff --git a/src/daletpack/encode.rs b/src/daletpack/encode.rs index 2120c7b..b3be160 100644 --- a/src/daletpack/encode.rs +++ b/src/daletpack/encode.rs @@ -3,10 +3,10 @@ use crate::daletl::{DlArg, DlBody, DlPage, DlTag}; use super::{utils, DaletPackError, TypeId}; pub fn encode(page: &DlPage) -> Result, DaletPackError> { - utils::compress_zstd(&encode_no_compress(page)?).map_err(|_| DaletPackError::ZstdCompressError) + utils::compress(&encode_no_compress(page)?).map_err(|_| DaletPackError::ZstdCompressError) } -pub fn encode_no_compress(page: &DlPage) -> Result, DaletPackError> { +fn encode_no_compress(page: &DlPage) -> Result, DaletPackError> { if page.data.len() > 2usize.pow(32) { return Err(DaletPackError::PageMaxSizeExceeded); } @@ -20,6 +20,28 @@ pub fn encode_no_compress(page: &DlPage) -> Result, DaletPackError> { Ok(bv) } +fn write_tag(bv: &mut Vec, tag: &DlTag) -> Result<(), DaletPackError> { + let type_id = match (&tag.body, &tag.argument) { + (DlBody::Text(_), DlArg::Text(_)) => TypeId::TextText, + (DlBody::Text(_), DlArg::Number(_)) => TypeId::TextNumber, + (DlBody::Text(_), DlArg::Null) => TypeId::TextBody, + (DlBody::Tags(_), DlArg::Text(_)) => TypeId::TagsText, + (DlBody::Tags(_), DlArg::Number(_)) => TypeId::TagsNumber, + (DlBody::Tags(_), DlArg::Null) => TypeId::TagsBody, + (DlBody::Null, DlArg::Text(_)) => TypeId::TextArg, + (DlBody::Null, DlArg::Number(_)) => TypeId::NumberArg, + (DlBody::Null, DlArg::Null) => TypeId::JustId, + }; + + bv.push(type_id as u8); + + bv.push(tag.id as u8); + write_tag_body(bv, &tag.body)?; + write_tag_argument(bv, &tag.argument)?; + + Ok(()) +} + fn write_str(bv: &mut Vec, string: &String) -> Result<(), DaletPackError> { let size = string.len(); @@ -42,49 +64,7 @@ fn write_array(bv: &mut Vec, arr: &Vec) -> Result<(), DaletPackError> write_tag(bv, tag)?; } - if arr.len() != 1 { - bv.push(TypeId::EndOfBody as u8); - } - - Ok(()) -} - -fn write_tag(bv: &mut Vec, tag: &DlTag) -> Result<(), DaletPackError> { - let type_id = match (&tag.body, &tag.argument) { - (DlBody::Text(_), DlArg::Text(_)) => TypeId::TextText, - (DlBody::Text(_), DlArg::Number(_)) => TypeId::TextNumber, - (DlBody::Text(_), DlArg::Null) => TypeId::TextBody, - (DlBody::Tags(vec), DlArg::Text(_)) => { - if vec.len() == 1 { - TypeId::TagText - } else { - TypeId::TagsText - } - } - (DlBody::Tags(vec), DlArg::Number(_)) => { - if vec.len() == 1 { - TypeId::TagNumber - } else { - TypeId::TagsNumber - } - } - (DlBody::Tags(vec), DlArg::Null) => { - if vec.len() == 1 { - TypeId::TagBody - } else { - TypeId::TagsBody - } - } - (DlBody::Null, DlArg::Text(_)) => TypeId::TextArg, - (DlBody::Null, DlArg::Number(_)) => TypeId::NumberArg, - (DlBody::Null, DlArg::Null) => TypeId::JustId, - }; - - bv.push(type_id as u8); - bv.push(tag.id as u8); - - write_tag_body(bv, &tag.body)?; - write_tag_argument(bv, &tag.argument)?; + bv.push(TypeId::EndOfBody as u8); Ok(()) } diff --git a/src/daletpack/types.rs b/src/daletpack/types.rs index 34d4109..ded8bb0 100644 --- a/src/daletpack/types.rs +++ b/src/daletpack/types.rs @@ -46,18 +46,15 @@ pub enum TypeId { EndOfBody, TextBody, - TagBody, TagsBody, TextArg, NumberArg, TextText, - TagText, TagsText, TextNumber, - TagNumber, TagsNumber, JustId, diff --git a/src/daletpack/utils.rs b/src/daletpack/utils.rs index 1f8f257..2161c67 100644 --- a/src/daletpack/utils.rs +++ b/src/daletpack/utils.rs @@ -1,12 +1,14 @@ -use std::io::{self, Read}; -use zstd::stream::read::Decoder; +use flate2::{read::DeflateDecoder, write::DeflateEncoder, Compression}; +use std::io::{Read, Result, Write}; -pub fn compress_zstd(data: &[u8]) -> io::Result> { - zstd::bulk::compress(data, 22) +pub fn compress(data: &[u8]) -> Result> { + let mut c = DeflateEncoder::new(Vec::new(), Compression::best()); + c.write_all(data)?; + c.finish() } -pub fn decompress_zstd(data: &[u8]) -> io::Result> { - let mut decoder = Decoder::new(data)?; +pub fn decompress(data: &[u8]) -> Result> { + let mut decoder = DeflateDecoder::new(data); let mut decompressed = Vec::new(); decoder.read_to_end(&mut decompressed)?; Ok(decompressed)