fix: decoder

This commit is contained in:
Artemy Egorov 2024-10-09 16:43:35 +03:00
parent 573929beee
commit a2dfb567c8
7 changed files with 175 additions and 263 deletions

46
Cargo.lock generated
View file

@ -121,8 +121,6 @@ version = "1.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1"
dependencies = [ dependencies = [
"jobserver",
"libc",
"shlex", "shlex",
] ]
@ -214,7 +212,6 @@ dependencies = [
"rmp-serde", "rmp-serde",
"serde", "serde",
"serde_repr", "serde_repr",
"zstd",
] ]
[[package]] [[package]]
@ -281,15 +278,6 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "jobserver"
version = "0.1.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.159" version = "0.2.159"
@ -353,12 +341,6 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "pkg-config"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
[[package]] [[package]]
name = "proc-macro-crate" name = "proc-macro-crate"
version = "3.2.0" version = "3.2.0"
@ -658,31 +640,3 @@ dependencies = [
"quote", "quote",
"syn", "syn",
] ]
[[package]]
name = "zstd"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.13+zstd.1.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa"
dependencies = [
"cc",
"pkg-config",
]

View file

@ -20,7 +20,8 @@ clap = { version = "4.5.13", features = ["derive"] }
num_enum = "0.7.3" num_enum = "0.7.3"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_repr = "0.1" serde_repr = "0.1"
zstd = "0.13.2" flate2 = "1.0"
enum-procs = "0.3.0" enum-procs = "0.3.0"
chumsky = { version = "=1.0.0-alpha.7", features = ["label"], optional = true } chumsky = { version = "=1.0.0-alpha.7", features = ["label"], optional = true }
@ -29,7 +30,6 @@ ariadne = { version = "0.4.1", optional = true }
[dev-dependencies] [dev-dependencies]
rmp-serde = "1.3.0" rmp-serde = "1.3.0"
bincode = "1.3.3" bincode = "1.3.3"
flate2 = "1.0"
[features] [features]
default = ["types", "daletpack", "parsers", "daleth"] default = ["types", "daletpack", "parsers", "daleth"]

View file

@ -2,8 +2,6 @@ use dalet::{
daletpack::*, daletpack::*,
typed::{Hl, TNullArg, Tag::*}, typed::{Hl, TNullArg, Tag::*},
}; };
use flate2::Compression;
use std::io::Write;
#[macro_export] #[macro_export]
macro_rules! iprint { macro_rules! iprint {
@ -21,34 +19,11 @@ macro_rules! iprint {
macro_rules! bench { macro_rules! bench {
($name:expr, $func:expr) => {{ ($name:expr, $func:expr) => {{
let res = iprint!($name, $func); let res = iprint!($name, $func);
iprint!(
$name.to_owned() + " zstd",
utils::compress_zstd(&res).unwrap()
);
iprint!($name.to_owned() + " zlib", compress_zlib(&res).unwrap());
iprint!(
$name.to_owned() + " deflate",
compress_deflate(&res).unwrap()
);
println!();
res res
}}; }};
} }
fn compress_deflate(data: &[u8]) -> std::io::Result<Vec<u8>> {
let mut c = flate2::write::DeflateEncoder::new(Vec::new(), Compression::default());
c.write_all(data)?;
c.finish()
}
fn compress_zlib(data: &[u8]) -> std::io::Result<Vec<u8>> {
let mut c = flate2::write::ZlibEncoder::new(Vec::new(), Compression::default());
c.write_all(data)?;
c.finish()
}
fn main() { fn main() {
let page = vec![ let page = vec![
H("Heading 1".into(), Hl::One), H("Heading 1".into(), Hl::One),
@ -98,8 +73,23 @@ fn main() {
let dalet_page = page.into(); let dalet_page = page.into();
bench!("Markdown", include_str!("./bench.md").as_bytes().to_vec()); let encoded = bench!("Daletpack", encode(&dalet_page).unwrap());
bench!("Daletpack", encode_no_compress(&dalet_page).unwrap());
bench!("Messagepack", rmp_serde::to_vec(&dalet_page).unwrap()); assert_eq!(
bench!("Bincode", bincode::serialize(&dalet_page).unwrap()); Decoder::new(&encoded).unwrap().decode().unwrap(),
dalet_page
);
bench!(
"Markdown",
utils::compress(&include_str!("./bench.md").as_bytes().to_vec()).unwrap()
);
bench!(
"Messagepack",
utils::compress(&rmp_serde::to_vec(&dalet_page).unwrap()).unwrap()
);
bench!(
"Bincode",
utils::compress(&bincode::serialize(&dalet_page).unwrap()).unwrap()
);
} }

View file

@ -1,6 +1,6 @@
use crate::daletl::{DlPage, DlTag}; use crate::daletl::{DlArg, DlBody, DlPage, DlTag, DlTid};
use super::{utils, DaletPackDecodeError}; use super::{utils, DaletPackDecodeError, TypeId};
pub struct Decoder<'a> { pub struct Decoder<'a> {
data: Box<dyn Iterator<Item = u8> + 'a>, data: Box<dyn Iterator<Item = u8> + 'a>,
@ -9,162 +9,151 @@ pub struct Decoder<'a> {
impl<'a> Decoder<'a> { impl<'a> Decoder<'a> {
pub fn new(data: &[u8]) -> Result<Self, DaletPackDecodeError> { pub fn new(data: &[u8]) -> Result<Self, DaletPackDecodeError> {
let data = let data =
utils::decompress_zstd(data).map_err(|_| DaletPackDecodeError::ZstdDecompressError)?; utils::decompress(data).map_err(|_| DaletPackDecodeError::ZstdDecompressError)?;
Ok(Self { Ok(Self {
data: Box::new(data.into_iter()), data: Box::new(data.into_iter()),
}) })
} }
pub fn decode(&mut self) -> Result<DlPage, DaletPackDecodeError> { pub fn decode(&mut self) -> Result<DlPage, DaletPackDecodeError> {
let array: Vec<DlTag> = Vec::new(); Ok(DlPage {
data: self.read_tags(false)?,
// for _ in 0..u32::MAX { })
// let typeid = self.data.next();
// match typeid {
// Some(typeid) => match typeid.try_into()? {
// TypeId::Text => array.push(DlTag::new(
// DlTid::El,
// self.read_text()?.into(),
// DlArgument::Null,
// )),
// TypeId::Tags => array.push(DlTag::new(
// DlTid::El,
// self.read_tag_array()?.into(),
// DlArgument::Null,
// )),
// TypeId::TagId => array.push(self.read_tag_with_id()?),
// TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?),
// TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?),
// TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?),
// _ => Err(DaletPackDecodeError::InvalidSchema)?,
// },
// None => break,
// }
// }
Ok(DlPage { data: array })
} }
// pub fn read_body(&mut self) -> Result<DlBody, DaletPackDecodeError> { fn read_tags(&mut self, with_end: bool) -> Result<Vec<DlTag>, DaletPackDecodeError> {
// let typeid: TypeId = self let mut data: Vec<DlTag> = Vec::new();
// .data
// .next()
// .ok_or(DaletPackDecodeError::InvalidSchema)?
// .try_into()?;
// let value = match typeid { for _ in 0..u32::MAX {
// TypeId::Text => DlBody::Text(self.read_text()?), let typeid = self.data.next();
// TypeId::Tags => DlBody::Tags(self.read_tag_array()?),
// _ => Err(DaletPackDecodeError::InvalidArgument)?,
// };
// Ok(value) let tag = match typeid {
// } Some(typeid) => match typeid.try_into()? {
TypeId::JustId => self.tag_just_id()?,
TypeId::TextBody => self.tag_text_body()?,
TypeId::TagsBody => self.tag_tags_body()?,
TypeId::TextArg => self.tag_text_arg()?,
TypeId::NumberArg => self.tag_number_arg()?,
TypeId::TextText => self.tag_text_text()?,
TypeId::TagsText => self.tag_tags_text()?,
TypeId::TextNumber => self.tag_text_number()?,
TypeId::TagsNumber => self.tag_tags_number()?,
// pub fn read_arg(&mut self) -> Result<DlArgument, DaletPackDecodeError> { TypeId::EndOfBody => {
// let typeid: TypeId = self if with_end {
// .data break;
// .next() } else {
// .ok_or(DaletPackDecodeError::InvalidSchema)? Err(DaletPackDecodeError::InvalidSchema)?
// .try_into()?; }
}
},
None => {
if with_end {
Err(DaletPackDecodeError::InvalidSchema)?
} else {
break;
}
}
};
// let value = match typeid { data.push(tag);
// TypeId::Text => DlArgument::Text(self.read_text()?), }
// TypeId::Number => DlArgument::Number(self.read_number()?),
// _ => Err(DaletPackDecodeError::InvalidArgument)?,
// };
// Ok(value) Ok(data)
// } }
// fn read_number(&mut self) -> Result<u8, DaletPackDecodeError> { fn tag_tags_number(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// self.data.next().ok_or(DaletPackDecodeError::InvalidSchema) Ok(DlTag::new(
// } self.read_tag_id()?,
self.read_tags(true)?.into(),
self.read_number()?.into(),
))
}
// fn read_text(&mut self) -> Result<String, DaletPackDecodeError> { fn tag_text_number(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// let mut str = String::new(); Ok(DlTag::new(
self.read_tag_id()?,
self.read_text()?.into(),
self.read_number()?.into(),
))
}
// for _ in 0..u32::MAX { fn tag_tags_text(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// let val = self Ok(DlTag::new(
// .data self.read_tag_id()?,
// .next() self.read_tags(true)?.into(),
// .ok_or(DaletPackDecodeError::InvalidTextSchema)?; self.read_text()?.into(),
))
}
// if val == TypeId::TextEnd as u8 { fn tag_text_text(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// break; Ok(DlTag::new(
// } self.read_tag_id()?,
self.read_text()?.into(),
self.read_text()?.into(),
))
}
// str.push(val as char); fn tag_number_arg(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// } Ok(DlTag::new(
self.read_tag_id()?,
DlBody::Null,
self.read_number()?.into(),
))
}
// Ok(str) fn tag_text_arg(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// } Ok(DlTag::new(
self.read_tag_id()?,
DlBody::Null,
self.read_text()?.into(),
))
}
// fn read_tag_array(&mut self) -> Result<Vec<DlTag>, DaletPackDecodeError> { fn tag_tags_body(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// let mut array = Vec::new(); Ok(DlTag::new(
self.read_tag_id()?,
self.read_tags(true)?.into(),
DlArg::Null,
))
}
// for _ in 0..u32::MAX { fn tag_text_body(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// let typeid: TypeId = self Ok(DlTag::new(
// .data self.read_tag_id()?,
// .next() self.read_text()?.into(),
// .ok_or(DaletPackDecodeError::InvalidTagsSchema)? DlArg::Null,
// .try_into()?; ))
}
// match typeid { fn tag_just_id(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// TypeId::Text => array.push(DlTag::new( Ok(DlTag::new(self.read_tag_id()?, DlBody::Null, DlArg::Null))
// DlTid::El, }
// self.read_text()?.into(),
// DlArgument::Null,
// )),
// TypeId::Tags => array.push(DlTag::new(
// DlTid::El,
// self.read_tag_array()?.into(),
// DlArgument::Null,
// )),
// TypeId::TagId => array.push(self.read_tag_with_id()?),
// TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?),
// TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?),
// TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?),
// TypeId::TagsEnd => break, fn read_tag_id(&mut self) -> Result<DlTid, DaletPackDecodeError> {
// _ => Err(DaletPackDecodeError::InvalidSchema)?, Ok(self.read_number()?.try_into()?)
// } }
// }
// Ok(array) fn read_number(&mut self) -> Result<u8, DaletPackDecodeError> {
// } self.data.next().ok_or(DaletPackDecodeError::InvalidSchema)
}
// fn read_tag_with_id(&mut self) -> Result<DlTag, DaletPackDecodeError> { fn read_text(&mut self) -> Result<String, DaletPackDecodeError> {
// Ok(DlTag::new( let mut str = String::new();
// self.read_number()?.try_into()?,
// DlBody::Null,
// DlArgument::Null,
// ))
// }
// fn read_tag_with_id_body(&mut self) -> Result<DlTag, DaletPackDecodeError> { for _ in 0..u32::MAX {
// Ok(DlTag::new( let val = self
// self.read_number()?.try_into()?, .data
// self.read_body()?, .next()
// DlArgument::Null, .ok_or(DaletPackDecodeError::InvalidTextSchema)?;
// ))
// }
// fn read_tag_with_id_argument(&mut self) -> Result<DlTag, DaletPackDecodeError> { if val == TypeId::EndOfBody as u8 {
// Ok(DlTag::new( break;
// self.read_number()?.try_into()?, }
// DlBody::Null,
// self.read_arg()?,
// ))
// }
// fn read_full_tag(&mut self) -> Result<DlTag, DaletPackDecodeError> { str.push(val as char);
// Ok(DlTag::new( }
// self.read_number()?.try_into()?,
// self.read_body()?, Ok(str)
// self.read_arg()?, }
// ))
// }
} }

View file

@ -3,10 +3,10 @@ use crate::daletl::{DlArg, DlBody, DlPage, DlTag};
use super::{utils, DaletPackError, TypeId}; use super::{utils, DaletPackError, TypeId};
pub fn encode(page: &DlPage) -> Result<Vec<u8>, DaletPackError> { pub fn encode(page: &DlPage) -> Result<Vec<u8>, DaletPackError> {
utils::compress_zstd(&encode_no_compress(page)?).map_err(|_| DaletPackError::ZstdCompressError) utils::compress(&encode_no_compress(page)?).map_err(|_| DaletPackError::ZstdCompressError)
} }
pub fn encode_no_compress(page: &DlPage) -> Result<Vec<u8>, DaletPackError> { fn encode_no_compress(page: &DlPage) -> Result<Vec<u8>, DaletPackError> {
if page.data.len() > 2usize.pow(32) { if page.data.len() > 2usize.pow(32) {
return Err(DaletPackError::PageMaxSizeExceeded); return Err(DaletPackError::PageMaxSizeExceeded);
} }
@ -20,6 +20,28 @@ pub fn encode_no_compress(page: &DlPage) -> Result<Vec<u8>, DaletPackError> {
Ok(bv) Ok(bv)
} }
fn write_tag(bv: &mut Vec<u8>, tag: &DlTag) -> Result<(), DaletPackError> {
let type_id = match (&tag.body, &tag.argument) {
(DlBody::Text(_), DlArg::Text(_)) => TypeId::TextText,
(DlBody::Text(_), DlArg::Number(_)) => TypeId::TextNumber,
(DlBody::Text(_), DlArg::Null) => TypeId::TextBody,
(DlBody::Tags(_), DlArg::Text(_)) => TypeId::TagsText,
(DlBody::Tags(_), DlArg::Number(_)) => TypeId::TagsNumber,
(DlBody::Tags(_), DlArg::Null) => TypeId::TagsBody,
(DlBody::Null, DlArg::Text(_)) => TypeId::TextArg,
(DlBody::Null, DlArg::Number(_)) => TypeId::NumberArg,
(DlBody::Null, DlArg::Null) => TypeId::JustId,
};
bv.push(type_id as u8);
bv.push(tag.id as u8);
write_tag_body(bv, &tag.body)?;
write_tag_argument(bv, &tag.argument)?;
Ok(())
}
fn write_str(bv: &mut Vec<u8>, string: &String) -> Result<(), DaletPackError> { fn write_str(bv: &mut Vec<u8>, string: &String) -> Result<(), DaletPackError> {
let size = string.len(); let size = string.len();
@ -42,49 +64,7 @@ fn write_array(bv: &mut Vec<u8>, arr: &Vec<DlTag>) -> Result<(), DaletPackError>
write_tag(bv, tag)?; write_tag(bv, tag)?;
} }
if arr.len() != 1 { bv.push(TypeId::EndOfBody as u8);
bv.push(TypeId::EndOfBody as u8);
}
Ok(())
}
fn write_tag(bv: &mut Vec<u8>, tag: &DlTag) -> Result<(), DaletPackError> {
let type_id = match (&tag.body, &tag.argument) {
(DlBody::Text(_), DlArg::Text(_)) => TypeId::TextText,
(DlBody::Text(_), DlArg::Number(_)) => TypeId::TextNumber,
(DlBody::Text(_), DlArg::Null) => TypeId::TextBody,
(DlBody::Tags(vec), DlArg::Text(_)) => {
if vec.len() == 1 {
TypeId::TagText
} else {
TypeId::TagsText
}
}
(DlBody::Tags(vec), DlArg::Number(_)) => {
if vec.len() == 1 {
TypeId::TagNumber
} else {
TypeId::TagsNumber
}
}
(DlBody::Tags(vec), DlArg::Null) => {
if vec.len() == 1 {
TypeId::TagBody
} else {
TypeId::TagsBody
}
}
(DlBody::Null, DlArg::Text(_)) => TypeId::TextArg,
(DlBody::Null, DlArg::Number(_)) => TypeId::NumberArg,
(DlBody::Null, DlArg::Null) => TypeId::JustId,
};
bv.push(type_id as u8);
bv.push(tag.id as u8);
write_tag_body(bv, &tag.body)?;
write_tag_argument(bv, &tag.argument)?;
Ok(()) Ok(())
} }

View file

@ -46,18 +46,15 @@ pub enum TypeId {
EndOfBody, EndOfBody,
TextBody, TextBody,
TagBody,
TagsBody, TagsBody,
TextArg, TextArg,
NumberArg, NumberArg,
TextText, TextText,
TagText,
TagsText, TagsText,
TextNumber, TextNumber,
TagNumber,
TagsNumber, TagsNumber,
JustId, JustId,

View file

@ -1,12 +1,14 @@
use std::io::{self, Read}; use flate2::{read::DeflateDecoder, write::DeflateEncoder, Compression};
use zstd::stream::read::Decoder; use std::io::{Read, Result, Write};
pub fn compress_zstd(data: &[u8]) -> io::Result<Vec<u8>> { pub fn compress(data: &[u8]) -> Result<Vec<u8>> {
zstd::bulk::compress(data, 22) let mut c = DeflateEncoder::new(Vec::new(), Compression::best());
c.write_all(data)?;
c.finish()
} }
pub fn decompress_zstd(data: &[u8]) -> io::Result<Vec<u8>> { pub fn decompress(data: &[u8]) -> Result<Vec<u8>> {
let mut decoder = Decoder::new(data)?; let mut decoder = DeflateDecoder::new(data);
let mut decompressed = Vec::new(); let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?; decoder.read_to_end(&mut decompressed)?;
Ok(decompressed) Ok(decompressed)