fix: decoder

This commit is contained in:
Artemy Egorov 2024-10-09 16:43:35 +03:00
parent 573929beee
commit a2dfb567c8
7 changed files with 175 additions and 263 deletions

46
Cargo.lock generated
View file

@ -121,8 +121,6 @@ version = "1.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1"
dependencies = [
"jobserver",
"libc",
"shlex",
]
@ -214,7 +212,6 @@ dependencies = [
"rmp-serde",
"serde",
"serde_repr",
"zstd",
]
[[package]]
@ -281,15 +278,6 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "jobserver"
version = "0.1.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
dependencies = [
"libc",
]
[[package]]
name = "libc"
version = "0.2.159"
@ -353,12 +341,6 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "pkg-config"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
[[package]]
name = "proc-macro-crate"
version = "3.2.0"
@ -658,31 +640,3 @@ dependencies = [
"quote",
"syn",
]
[[package]]
name = "zstd"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.13+zstd.1.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa"
dependencies = [
"cc",
"pkg-config",
]

View file

@ -20,7 +20,8 @@ clap = { version = "4.5.13", features = ["derive"] }
num_enum = "0.7.3"
serde = { version = "1.0", features = ["derive"] }
serde_repr = "0.1"
zstd = "0.13.2"
flate2 = "1.0"
enum-procs = "0.3.0"
chumsky = { version = "=1.0.0-alpha.7", features = ["label"], optional = true }
@ -29,7 +30,6 @@ ariadne = { version = "0.4.1", optional = true }
[dev-dependencies]
rmp-serde = "1.3.0"
bincode = "1.3.3"
flate2 = "1.0"
[features]
default = ["types", "daletpack", "parsers", "daleth"]

View file

@ -2,8 +2,6 @@ use dalet::{
daletpack::*,
typed::{Hl, TNullArg, Tag::*},
};
use flate2::Compression;
use std::io::Write;
#[macro_export]
macro_rules! iprint {
@ -21,34 +19,11 @@ macro_rules! iprint {
macro_rules! bench {
($name:expr, $func:expr) => {{
let res = iprint!($name, $func);
iprint!(
$name.to_owned() + " zstd",
utils::compress_zstd(&res).unwrap()
);
iprint!($name.to_owned() + " zlib", compress_zlib(&res).unwrap());
iprint!(
$name.to_owned() + " deflate",
compress_deflate(&res).unwrap()
);
println!();
res
}};
}
fn compress_deflate(data: &[u8]) -> std::io::Result<Vec<u8>> {
let mut c = flate2::write::DeflateEncoder::new(Vec::new(), Compression::default());
c.write_all(data)?;
c.finish()
}
fn compress_zlib(data: &[u8]) -> std::io::Result<Vec<u8>> {
let mut c = flate2::write::ZlibEncoder::new(Vec::new(), Compression::default());
c.write_all(data)?;
c.finish()
}
fn main() {
let page = vec![
H("Heading 1".into(), Hl::One),
@ -98,8 +73,23 @@ fn main() {
let dalet_page = page.into();
bench!("Markdown", include_str!("./bench.md").as_bytes().to_vec());
bench!("Daletpack", encode_no_compress(&dalet_page).unwrap());
bench!("Messagepack", rmp_serde::to_vec(&dalet_page).unwrap());
bench!("Bincode", bincode::serialize(&dalet_page).unwrap());
let encoded = bench!("Daletpack", encode(&dalet_page).unwrap());
assert_eq!(
Decoder::new(&encoded).unwrap().decode().unwrap(),
dalet_page
);
bench!(
"Markdown",
utils::compress(&include_str!("./bench.md").as_bytes().to_vec()).unwrap()
);
bench!(
"Messagepack",
utils::compress(&rmp_serde::to_vec(&dalet_page).unwrap()).unwrap()
);
bench!(
"Bincode",
utils::compress(&bincode::serialize(&dalet_page).unwrap()).unwrap()
);
}

View file

@ -1,6 +1,6 @@
use crate::daletl::{DlPage, DlTag};
use crate::daletl::{DlArg, DlBody, DlPage, DlTag, DlTid};
use super::{utils, DaletPackDecodeError};
use super::{utils, DaletPackDecodeError, TypeId};
pub struct Decoder<'a> {
data: Box<dyn Iterator<Item = u8> + 'a>,
@ -9,162 +9,151 @@ pub struct Decoder<'a> {
impl<'a> Decoder<'a> {
pub fn new(data: &[u8]) -> Result<Self, DaletPackDecodeError> {
let data =
utils::decompress_zstd(data).map_err(|_| DaletPackDecodeError::ZstdDecompressError)?;
utils::decompress(data).map_err(|_| DaletPackDecodeError::ZstdDecompressError)?;
Ok(Self {
data: Box::new(data.into_iter()),
})
}
pub fn decode(&mut self) -> Result<DlPage, DaletPackDecodeError> {
let array: Vec<DlTag> = Vec::new();
// for _ in 0..u32::MAX {
// let typeid = self.data.next();
// match typeid {
// Some(typeid) => match typeid.try_into()? {
// TypeId::Text => array.push(DlTag::new(
// DlTid::El,
// self.read_text()?.into(),
// DlArgument::Null,
// )),
// TypeId::Tags => array.push(DlTag::new(
// DlTid::El,
// self.read_tag_array()?.into(),
// DlArgument::Null,
// )),
// TypeId::TagId => array.push(self.read_tag_with_id()?),
// TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?),
// TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?),
// TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?),
// _ => Err(DaletPackDecodeError::InvalidSchema)?,
// },
// None => break,
// }
// }
Ok(DlPage { data: array })
Ok(DlPage {
data: self.read_tags(false)?,
})
}
// pub fn read_body(&mut self) -> Result<DlBody, DaletPackDecodeError> {
// let typeid: TypeId = self
// .data
// .next()
// .ok_or(DaletPackDecodeError::InvalidSchema)?
// .try_into()?;
fn read_tags(&mut self, with_end: bool) -> Result<Vec<DlTag>, DaletPackDecodeError> {
let mut data: Vec<DlTag> = Vec::new();
// let value = match typeid {
// TypeId::Text => DlBody::Text(self.read_text()?),
// TypeId::Tags => DlBody::Tags(self.read_tag_array()?),
// _ => Err(DaletPackDecodeError::InvalidArgument)?,
// };
for _ in 0..u32::MAX {
let typeid = self.data.next();
// Ok(value)
// }
let tag = match typeid {
Some(typeid) => match typeid.try_into()? {
TypeId::JustId => self.tag_just_id()?,
TypeId::TextBody => self.tag_text_body()?,
TypeId::TagsBody => self.tag_tags_body()?,
TypeId::TextArg => self.tag_text_arg()?,
TypeId::NumberArg => self.tag_number_arg()?,
TypeId::TextText => self.tag_text_text()?,
TypeId::TagsText => self.tag_tags_text()?,
TypeId::TextNumber => self.tag_text_number()?,
TypeId::TagsNumber => self.tag_tags_number()?,
// pub fn read_arg(&mut self) -> Result<DlArgument, DaletPackDecodeError> {
// let typeid: TypeId = self
// .data
// .next()
// .ok_or(DaletPackDecodeError::InvalidSchema)?
// .try_into()?;
TypeId::EndOfBody => {
if with_end {
break;
} else {
Err(DaletPackDecodeError::InvalidSchema)?
}
}
},
None => {
if with_end {
Err(DaletPackDecodeError::InvalidSchema)?
} else {
break;
}
}
};
// let value = match typeid {
// TypeId::Text => DlArgument::Text(self.read_text()?),
// TypeId::Number => DlArgument::Number(self.read_number()?),
// _ => Err(DaletPackDecodeError::InvalidArgument)?,
// };
data.push(tag);
}
// Ok(value)
// }
Ok(data)
}
// fn read_number(&mut self) -> Result<u8, DaletPackDecodeError> {
// self.data.next().ok_or(DaletPackDecodeError::InvalidSchema)
// }
fn tag_tags_number(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_tag_id()?,
self.read_tags(true)?.into(),
self.read_number()?.into(),
))
}
// fn read_text(&mut self) -> Result<String, DaletPackDecodeError> {
// let mut str = String::new();
fn tag_text_number(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_tag_id()?,
self.read_text()?.into(),
self.read_number()?.into(),
))
}
// for _ in 0..u32::MAX {
// let val = self
// .data
// .next()
// .ok_or(DaletPackDecodeError::InvalidTextSchema)?;
fn tag_tags_text(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_tag_id()?,
self.read_tags(true)?.into(),
self.read_text()?.into(),
))
}
// if val == TypeId::TextEnd as u8 {
// break;
// }
fn tag_text_text(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_tag_id()?,
self.read_text()?.into(),
self.read_text()?.into(),
))
}
// str.push(val as char);
// }
fn tag_number_arg(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_tag_id()?,
DlBody::Null,
self.read_number()?.into(),
))
}
// Ok(str)
// }
fn tag_text_arg(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_tag_id()?,
DlBody::Null,
self.read_text()?.into(),
))
}
// fn read_tag_array(&mut self) -> Result<Vec<DlTag>, DaletPackDecodeError> {
// let mut array = Vec::new();
fn tag_tags_body(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_tag_id()?,
self.read_tags(true)?.into(),
DlArg::Null,
))
}
// for _ in 0..u32::MAX {
// let typeid: TypeId = self
// .data
// .next()
// .ok_or(DaletPackDecodeError::InvalidTagsSchema)?
// .try_into()?;
fn tag_text_body(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_tag_id()?,
self.read_text()?.into(),
DlArg::Null,
))
}
// match typeid {
// TypeId::Text => array.push(DlTag::new(
// DlTid::El,
// self.read_text()?.into(),
// DlArgument::Null,
// )),
// TypeId::Tags => array.push(DlTag::new(
// DlTid::El,
// self.read_tag_array()?.into(),
// DlArgument::Null,
// )),
// TypeId::TagId => array.push(self.read_tag_with_id()?),
// TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?),
// TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?),
// TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?),
fn tag_just_id(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(self.read_tag_id()?, DlBody::Null, DlArg::Null))
}
// TypeId::TagsEnd => break,
// _ => Err(DaletPackDecodeError::InvalidSchema)?,
// }
// }
fn read_tag_id(&mut self) -> Result<DlTid, DaletPackDecodeError> {
Ok(self.read_number()?.try_into()?)
}
// Ok(array)
// }
fn read_number(&mut self) -> Result<u8, DaletPackDecodeError> {
self.data.next().ok_or(DaletPackDecodeError::InvalidSchema)
}
// fn read_tag_with_id(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// Ok(DlTag::new(
// self.read_number()?.try_into()?,
// DlBody::Null,
// DlArgument::Null,
// ))
// }
fn read_text(&mut self) -> Result<String, DaletPackDecodeError> {
let mut str = String::new();
// fn read_tag_with_id_body(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// Ok(DlTag::new(
// self.read_number()?.try_into()?,
// self.read_body()?,
// DlArgument::Null,
// ))
// }
for _ in 0..u32::MAX {
let val = self
.data
.next()
.ok_or(DaletPackDecodeError::InvalidTextSchema)?;
// fn read_tag_with_id_argument(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// Ok(DlTag::new(
// self.read_number()?.try_into()?,
// DlBody::Null,
// self.read_arg()?,
// ))
// }
if val == TypeId::EndOfBody as u8 {
break;
}
// fn read_full_tag(&mut self) -> Result<DlTag, DaletPackDecodeError> {
// Ok(DlTag::new(
// self.read_number()?.try_into()?,
// self.read_body()?,
// self.read_arg()?,
// ))
// }
str.push(val as char);
}
Ok(str)
}
}

View file

@ -3,10 +3,10 @@ use crate::daletl::{DlArg, DlBody, DlPage, DlTag};
use super::{utils, DaletPackError, TypeId};
pub fn encode(page: &DlPage) -> Result<Vec<u8>, DaletPackError> {
utils::compress_zstd(&encode_no_compress(page)?).map_err(|_| DaletPackError::ZstdCompressError)
utils::compress(&encode_no_compress(page)?).map_err(|_| DaletPackError::ZstdCompressError)
}
pub fn encode_no_compress(page: &DlPage) -> Result<Vec<u8>, DaletPackError> {
fn encode_no_compress(page: &DlPage) -> Result<Vec<u8>, DaletPackError> {
if page.data.len() > 2usize.pow(32) {
return Err(DaletPackError::PageMaxSizeExceeded);
}
@ -20,6 +20,28 @@ pub fn encode_no_compress(page: &DlPage) -> Result<Vec<u8>, DaletPackError> {
Ok(bv)
}
fn write_tag(bv: &mut Vec<u8>, tag: &DlTag) -> Result<(), DaletPackError> {
let type_id = match (&tag.body, &tag.argument) {
(DlBody::Text(_), DlArg::Text(_)) => TypeId::TextText,
(DlBody::Text(_), DlArg::Number(_)) => TypeId::TextNumber,
(DlBody::Text(_), DlArg::Null) => TypeId::TextBody,
(DlBody::Tags(_), DlArg::Text(_)) => TypeId::TagsText,
(DlBody::Tags(_), DlArg::Number(_)) => TypeId::TagsNumber,
(DlBody::Tags(_), DlArg::Null) => TypeId::TagsBody,
(DlBody::Null, DlArg::Text(_)) => TypeId::TextArg,
(DlBody::Null, DlArg::Number(_)) => TypeId::NumberArg,
(DlBody::Null, DlArg::Null) => TypeId::JustId,
};
bv.push(type_id as u8);
bv.push(tag.id as u8);
write_tag_body(bv, &tag.body)?;
write_tag_argument(bv, &tag.argument)?;
Ok(())
}
fn write_str(bv: &mut Vec<u8>, string: &String) -> Result<(), DaletPackError> {
let size = string.len();
@ -42,49 +64,7 @@ fn write_array(bv: &mut Vec<u8>, arr: &Vec<DlTag>) -> Result<(), DaletPackError>
write_tag(bv, tag)?;
}
if arr.len() != 1 {
bv.push(TypeId::EndOfBody as u8);
}
Ok(())
}
fn write_tag(bv: &mut Vec<u8>, tag: &DlTag) -> Result<(), DaletPackError> {
let type_id = match (&tag.body, &tag.argument) {
(DlBody::Text(_), DlArg::Text(_)) => TypeId::TextText,
(DlBody::Text(_), DlArg::Number(_)) => TypeId::TextNumber,
(DlBody::Text(_), DlArg::Null) => TypeId::TextBody,
(DlBody::Tags(vec), DlArg::Text(_)) => {
if vec.len() == 1 {
TypeId::TagText
} else {
TypeId::TagsText
}
}
(DlBody::Tags(vec), DlArg::Number(_)) => {
if vec.len() == 1 {
TypeId::TagNumber
} else {
TypeId::TagsNumber
}
}
(DlBody::Tags(vec), DlArg::Null) => {
if vec.len() == 1 {
TypeId::TagBody
} else {
TypeId::TagsBody
}
}
(DlBody::Null, DlArg::Text(_)) => TypeId::TextArg,
(DlBody::Null, DlArg::Number(_)) => TypeId::NumberArg,
(DlBody::Null, DlArg::Null) => TypeId::JustId,
};
bv.push(type_id as u8);
bv.push(tag.id as u8);
write_tag_body(bv, &tag.body)?;
write_tag_argument(bv, &tag.argument)?;
Ok(())
}

View file

@ -46,18 +46,15 @@ pub enum TypeId {
EndOfBody,
TextBody,
TagBody,
TagsBody,
TextArg,
NumberArg,
TextText,
TagText,
TagsText,
TextNumber,
TagNumber,
TagsNumber,
JustId,

View file

@ -1,12 +1,14 @@
use std::io::{self, Read};
use zstd::stream::read::Decoder;
use flate2::{read::DeflateDecoder, write::DeflateEncoder, Compression};
use std::io::{Read, Result, Write};
pub fn compress_zstd(data: &[u8]) -> io::Result<Vec<u8>> {
zstd::bulk::compress(data, 22)
pub fn compress(data: &[u8]) -> Result<Vec<u8>> {
let mut c = DeflateEncoder::new(Vec::new(), Compression::best());
c.write_all(data)?;
c.finish()
}
pub fn decompress_zstd(data: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(data)?;
pub fn decompress(data: &[u8]) -> Result<Vec<u8>> {
let mut decoder = DeflateDecoder::new(data);
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
Ok(decompressed)