feat: decode daletpack

This commit is contained in:
Artemy Egorov 2024-08-06 10:16:24 +03:00
parent 91e185df0b
commit 1ff6ba9c88
10 changed files with 244 additions and 19 deletions

172
src/daletpack/decode.rs Normal file
View file

@ -0,0 +1,172 @@
use std::u32::MAX;
use crate::daletl::{DlArgument, DlBody, DlPage, DlTag, DlTid};
use super::{utils, DaletPackDecodeError, TypeId};
pub struct Decoder<'a> {
data: Box<dyn Iterator<Item = u8> + 'a>,
}
impl<'a> Decoder<'a> {
pub fn new(data: &[u8]) -> Result<Self, DaletPackDecodeError> {
let data =
utils::decompress_zstd(data).map_err(|_| DaletPackDecodeError::ZstdDecompressError)?;
Ok(Self {
data: Box::new(data.into_iter()),
})
}
pub fn decode(&mut self) -> Result<DlPage, DaletPackDecodeError> {
let mut array: Vec<DlTag> = Vec::new();
for _ in 0..MAX {
let typeid = self.data.next();
match typeid {
Some(typeid) => match typeid.try_into()? {
TypeId::Text => array.push(DlTag::new(
DlTid::El,
self.read_text()?.into(),
DlArgument::Null,
)),
TypeId::Tags => array.push(DlTag::new(
DlTid::El,
self.read_tag_array()?.into(),
DlArgument::Null,
)),
TypeId::TagId => array.push(self.read_tag_with_id()?),
TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?),
TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?),
TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?),
_ => Err(DaletPackDecodeError::InvalidSchema)?,
},
None => break,
}
}
Ok(DlPage { data: array })
}
pub fn read_body(&mut self) -> Result<DlBody, DaletPackDecodeError> {
let typeid: TypeId = self
.data
.next()
.ok_or(DaletPackDecodeError::InvalidSchema)?
.try_into()?;
let value = match typeid {
TypeId::Text => DlBody::Text(self.read_text()?),
TypeId::Tags => DlBody::Tags(self.read_tag_array()?),
_ => Err(DaletPackDecodeError::InvalidArgument)?,
};
Ok(value)
}
pub fn read_arg(&mut self) -> Result<DlArgument, DaletPackDecodeError> {
let typeid: TypeId = self
.data
.next()
.ok_or(DaletPackDecodeError::InvalidSchema)?
.try_into()?;
let value = match typeid {
TypeId::Text => DlArgument::Text(self.read_text()?),
TypeId::Number => DlArgument::Number(self.read_number()?),
_ => Err(DaletPackDecodeError::InvalidArgument)?,
};
Ok(value)
}
fn read_number(&mut self) -> Result<u8, DaletPackDecodeError> {
self.data.next().ok_or(DaletPackDecodeError::InvalidSchema)
}
fn read_text(&mut self) -> Result<String, DaletPackDecodeError> {
let mut str = String::new();
for _ in 0..MAX {
let val = self
.data
.next()
.ok_or(DaletPackDecodeError::InvalidTextSchema)?;
if val == TypeId::TextEnd as u8 {
break;
}
str.push(val as char);
}
Ok(str)
}
fn read_tag_array(&mut self) -> Result<Vec<DlTag>, DaletPackDecodeError> {
let mut array = Vec::new();
for _ in 0..MAX {
let typeid: TypeId = self
.data
.next()
.ok_or(DaletPackDecodeError::InvalidTagsSchema)?
.try_into()?;
match typeid {
TypeId::Text => array.push(DlTag::new(
DlTid::El,
self.read_text()?.into(),
DlArgument::Null,
)),
TypeId::Tags => array.push(DlTag::new(
DlTid::El,
self.read_tag_array()?.into(),
DlArgument::Null,
)),
TypeId::TagId => array.push(self.read_tag_with_id()?),
TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?),
TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?),
TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?),
TypeId::TagsEnd => break,
_ => Err(DaletPackDecodeError::InvalidSchema)?,
}
}
Ok(array)
}
fn read_tag_with_id(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_number()?.try_into()?,
DlBody::Null,
DlArgument::Null,
))
}
fn read_tag_with_id_body(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_number()?.try_into()?,
self.read_body()?,
DlArgument::Null,
))
}
fn read_tag_with_id_argument(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_number()?.try_into()?,
DlBody::Null,
self.read_arg()?,
))
}
fn read_full_tag(&mut self) -> Result<DlTag, DaletPackDecodeError> {
Ok(DlTag::new(
self.read_number()?.try_into()?,
self.read_body()?,
self.read_arg()?,
))
}
}

View file

@ -22,7 +22,7 @@ pub fn encode_no_compress(page: &DlPage) -> Result<Vec<u8>, DaletPackError> {
}
fn write_int(bv: &mut Vec<u8>, n: u8) {
bv.push(1);
bv.push(TypeId::Number as u8);
bv.push(n);
}
@ -33,9 +33,9 @@ fn write_str(bv: &mut Vec<u8>, string: &String) -> Result<(), DaletPackError> {
return Err(DaletPackError::StrMaxSizeExceeded);
}
bv.push(TypeId::Str as u8);
bv.push(TypeId::Text as u8);
bv.extend_from_slice(string.as_bytes());
bv.push(TypeId::StrEnd as u8);
bv.push(TypeId::TextEnd as u8);
Ok(())
}
@ -45,13 +45,13 @@ fn write_array(bv: &mut Vec<u8>, arr: &Vec<DlTag>) -> Result<(), DaletPackError>
return Err(DaletPackError::ArrMaxSizeExceeded);
}
bv.push(TypeId::TagArray as u8);
bv.push(TypeId::Tags as u8);
for tag in arr {
write_tag(bv, tag)?;
}
bv.push(TypeId::TagArrayEnd as u8);
bv.push(TypeId::TagsEnd as u8);
Ok(())
}

View file

@ -1,6 +1,9 @@
mod decode;
mod encode;
mod types;
pub mod utils;
pub use decode::*;
pub use encode::*;
pub use types::*;

View file

@ -1,4 +1,6 @@
use num_enum::TryFromPrimitive;
use num_enum::{TryFromPrimitive, TryFromPrimitiveError};
use crate::daletl::DlTid;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DaletPackError {
@ -6,19 +8,46 @@ pub enum DaletPackError {
ArrMaxSizeExceeded,
PageMaxSizeExceeded,
ZstdCompressError,
ZstdDecompressError,
WriteNullBody,
WriteNullArgument,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DaletPackDecodeError {
ZstdDecompressError,
InvalidSchema,
InvalidTextSchema,
InvalidTagsSchema,
UnknownTypeId,
UnknownTagId,
InvalidArgument,
}
impl From<TryFromPrimitiveError<TypeId>> for DaletPackDecodeError {
fn from(_: TryFromPrimitiveError<TypeId>) -> Self {
DaletPackDecodeError::UnknownTypeId
}
}
impl From<TryFromPrimitiveError<DlTid>> for DaletPackDecodeError {
fn from(_: TryFromPrimitiveError<DlTid>) -> Self {
DaletPackDecodeError::UnknownTagId
}
}
#[derive(Debug, Clone, PartialEq, Eq, TryFromPrimitive, Copy)]
#[repr(u8)]
pub enum TypeId {
StrEnd = 0,
Str,
Int8,
TagArray,
TagArrayEnd,
TextEnd = 0,
Text,
Number,
Tags,
TagsEnd,
TagId,
TagIdBody,
TagIdArgument,

View file

@ -1,3 +1,9 @@
pub fn compress_zstd(data: &Vec<u8>) -> std::io::Result<Vec<u8>> {
use std::u32::MAX;
pub fn compress_zstd(data: &[u8]) -> std::io::Result<Vec<u8>> {
zstd::bulk::compress(data, 22)
}
pub fn decompress_zstd(data: &[u8]) -> std::io::Result<Vec<u8>> {
zstd::bulk::decompress(data, MAX as usize)
}

View file

@ -1,3 +1,3 @@
mod from_daletl;
mod from_typed;
mod is_null_daletl;
mod to_daletl;

View file

@ -6,6 +6,12 @@ pub struct Page {
pub data: Vec<Tag>,
}
impl Page {
pub fn new(data: Vec<Tag>) -> Self {
Self { data }
}
}
pub struct ConversionError;
#[derive(Debug, Clone, PartialEq, Eq)]

View file

@ -1,6 +1,6 @@
use dalet::{
daletpack::*,
typed::{Hl, Page, TNArg, Tag::*},
typed::{Hl, TNArg, Tag::*},
};
use flate2::Compression;
use std::io::Write;
@ -11,7 +11,7 @@ macro_rules! iprint {
let start = std::time::Instant::now();
let result = $func;
let elapsed = start.elapsed();
println!("{} ({:#?}): {} bytes", $name, elapsed, result.len());
println!("{} ({:#?}): {}", $name, elapsed, result.len());
result
}};
@ -32,7 +32,7 @@ pub fn compress_zlib(data: &Vec<u8>) -> std::io::Result<Vec<u8>> {
#[test]
fn bench() {
let page = vec![
H("I am heading".into(), Hl::One),
H("Heading 1".into(), Hl::One),
H("Heading 2".into(), Hl::Two),
P(vec![
El("Some ".into()),
@ -87,7 +87,7 @@ fn bench() {
println!();
iprint!("Markdown zstd", utils::compress_zstd(&markdown).unwrap());
iprint!("Daletpack zstd", utils::compress_zstd(&daletpack).unwrap());
let daletpack = iprint!("Daletpack zstd", utils::compress_zstd(&daletpack).unwrap());
iprint!(
"Messagepack zstd",
utils::compress_zstd(&messagepack).unwrap()
@ -110,4 +110,13 @@ fn bench() {
compress_deflate(&messagepack).unwrap()
);
iprint!("Bincode deflate", compress_deflate(&bincode).unwrap());
println!();
let decoded = iprint!(
"Daletpack decode",
Decoder::new(&daletpack).unwrap().decode().unwrap().data
);
println!("{:#?}", decoded);
}

View file

@ -4,7 +4,7 @@ use dalet::parsers::gemtext::parse_gemtext;
fn gem_text() {
let text = include_str!("./gemtext.gmi");
let parsed = parse_gemtext(&text).unwrap();
let _ = parse_gemtext(&text).unwrap();
println!("{:#?}", parsed);
// println!("{:#?}", parsed);
}