mirror of
https://github.com/TxtDot/dalet-rs.git
synced 2024-11-05 17:33:58 +03:00
feat: decode daletpack
This commit is contained in:
parent
91e185df0b
commit
1ff6ba9c88
10 changed files with 244 additions and 19 deletions
172
src/daletpack/decode.rs
Normal file
172
src/daletpack/decode.rs
Normal file
|
@ -0,0 +1,172 @@
|
|||
use std::u32::MAX;
|
||||
|
||||
use crate::daletl::{DlArgument, DlBody, DlPage, DlTag, DlTid};
|
||||
|
||||
use super::{utils, DaletPackDecodeError, TypeId};
|
||||
|
||||
pub struct Decoder<'a> {
|
||||
data: Box<dyn Iterator<Item = u8> + 'a>,
|
||||
}
|
||||
|
||||
impl<'a> Decoder<'a> {
|
||||
pub fn new(data: &[u8]) -> Result<Self, DaletPackDecodeError> {
|
||||
let data =
|
||||
utils::decompress_zstd(data).map_err(|_| DaletPackDecodeError::ZstdDecompressError)?;
|
||||
Ok(Self {
|
||||
data: Box::new(data.into_iter()),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn decode(&mut self) -> Result<DlPage, DaletPackDecodeError> {
|
||||
let mut array: Vec<DlTag> = Vec::new();
|
||||
|
||||
for _ in 0..MAX {
|
||||
let typeid = self.data.next();
|
||||
|
||||
match typeid {
|
||||
Some(typeid) => match typeid.try_into()? {
|
||||
TypeId::Text => array.push(DlTag::new(
|
||||
DlTid::El,
|
||||
self.read_text()?.into(),
|
||||
DlArgument::Null,
|
||||
)),
|
||||
TypeId::Tags => array.push(DlTag::new(
|
||||
DlTid::El,
|
||||
self.read_tag_array()?.into(),
|
||||
DlArgument::Null,
|
||||
)),
|
||||
TypeId::TagId => array.push(self.read_tag_with_id()?),
|
||||
TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?),
|
||||
TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?),
|
||||
TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?),
|
||||
|
||||
_ => Err(DaletPackDecodeError::InvalidSchema)?,
|
||||
},
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(DlPage { data: array })
|
||||
}
|
||||
|
||||
pub fn read_body(&mut self) -> Result<DlBody, DaletPackDecodeError> {
|
||||
let typeid: TypeId = self
|
||||
.data
|
||||
.next()
|
||||
.ok_or(DaletPackDecodeError::InvalidSchema)?
|
||||
.try_into()?;
|
||||
|
||||
let value = match typeid {
|
||||
TypeId::Text => DlBody::Text(self.read_text()?),
|
||||
TypeId::Tags => DlBody::Tags(self.read_tag_array()?),
|
||||
_ => Err(DaletPackDecodeError::InvalidArgument)?,
|
||||
};
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
pub fn read_arg(&mut self) -> Result<DlArgument, DaletPackDecodeError> {
|
||||
let typeid: TypeId = self
|
||||
.data
|
||||
.next()
|
||||
.ok_or(DaletPackDecodeError::InvalidSchema)?
|
||||
.try_into()?;
|
||||
|
||||
let value = match typeid {
|
||||
TypeId::Text => DlArgument::Text(self.read_text()?),
|
||||
TypeId::Number => DlArgument::Number(self.read_number()?),
|
||||
_ => Err(DaletPackDecodeError::InvalidArgument)?,
|
||||
};
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
fn read_number(&mut self) -> Result<u8, DaletPackDecodeError> {
|
||||
self.data.next().ok_or(DaletPackDecodeError::InvalidSchema)
|
||||
}
|
||||
|
||||
fn read_text(&mut self) -> Result<String, DaletPackDecodeError> {
|
||||
let mut str = String::new();
|
||||
|
||||
for _ in 0..MAX {
|
||||
let val = self
|
||||
.data
|
||||
.next()
|
||||
.ok_or(DaletPackDecodeError::InvalidTextSchema)?;
|
||||
|
||||
if val == TypeId::TextEnd as u8 {
|
||||
break;
|
||||
}
|
||||
|
||||
str.push(val as char);
|
||||
}
|
||||
|
||||
Ok(str)
|
||||
}
|
||||
|
||||
fn read_tag_array(&mut self) -> Result<Vec<DlTag>, DaletPackDecodeError> {
|
||||
let mut array = Vec::new();
|
||||
|
||||
for _ in 0..MAX {
|
||||
let typeid: TypeId = self
|
||||
.data
|
||||
.next()
|
||||
.ok_or(DaletPackDecodeError::InvalidTagsSchema)?
|
||||
.try_into()?;
|
||||
|
||||
match typeid {
|
||||
TypeId::Text => array.push(DlTag::new(
|
||||
DlTid::El,
|
||||
self.read_text()?.into(),
|
||||
DlArgument::Null,
|
||||
)),
|
||||
TypeId::Tags => array.push(DlTag::new(
|
||||
DlTid::El,
|
||||
self.read_tag_array()?.into(),
|
||||
DlArgument::Null,
|
||||
)),
|
||||
TypeId::TagId => array.push(self.read_tag_with_id()?),
|
||||
TypeId::TagIdBody => array.push(self.read_tag_with_id_body()?),
|
||||
TypeId::TagIdArgument => array.push(self.read_tag_with_id_argument()?),
|
||||
TypeId::TagIdBodyArgument => array.push(self.read_full_tag()?),
|
||||
|
||||
TypeId::TagsEnd => break,
|
||||
_ => Err(DaletPackDecodeError::InvalidSchema)?,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(array)
|
||||
}
|
||||
|
||||
fn read_tag_with_id(&mut self) -> Result<DlTag, DaletPackDecodeError> {
|
||||
Ok(DlTag::new(
|
||||
self.read_number()?.try_into()?,
|
||||
DlBody::Null,
|
||||
DlArgument::Null,
|
||||
))
|
||||
}
|
||||
|
||||
fn read_tag_with_id_body(&mut self) -> Result<DlTag, DaletPackDecodeError> {
|
||||
Ok(DlTag::new(
|
||||
self.read_number()?.try_into()?,
|
||||
self.read_body()?,
|
||||
DlArgument::Null,
|
||||
))
|
||||
}
|
||||
|
||||
fn read_tag_with_id_argument(&mut self) -> Result<DlTag, DaletPackDecodeError> {
|
||||
Ok(DlTag::new(
|
||||
self.read_number()?.try_into()?,
|
||||
DlBody::Null,
|
||||
self.read_arg()?,
|
||||
))
|
||||
}
|
||||
|
||||
fn read_full_tag(&mut self) -> Result<DlTag, DaletPackDecodeError> {
|
||||
Ok(DlTag::new(
|
||||
self.read_number()?.try_into()?,
|
||||
self.read_body()?,
|
||||
self.read_arg()?,
|
||||
))
|
||||
}
|
||||
}
|
|
@ -22,7 +22,7 @@ pub fn encode_no_compress(page: &DlPage) -> Result<Vec<u8>, DaletPackError> {
|
|||
}
|
||||
|
||||
fn write_int(bv: &mut Vec<u8>, n: u8) {
|
||||
bv.push(1);
|
||||
bv.push(TypeId::Number as u8);
|
||||
bv.push(n);
|
||||
}
|
||||
|
||||
|
@ -33,9 +33,9 @@ fn write_str(bv: &mut Vec<u8>, string: &String) -> Result<(), DaletPackError> {
|
|||
return Err(DaletPackError::StrMaxSizeExceeded);
|
||||
}
|
||||
|
||||
bv.push(TypeId::Str as u8);
|
||||
bv.push(TypeId::Text as u8);
|
||||
bv.extend_from_slice(string.as_bytes());
|
||||
bv.push(TypeId::StrEnd as u8);
|
||||
bv.push(TypeId::TextEnd as u8);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -45,13 +45,13 @@ fn write_array(bv: &mut Vec<u8>, arr: &Vec<DlTag>) -> Result<(), DaletPackError>
|
|||
return Err(DaletPackError::ArrMaxSizeExceeded);
|
||||
}
|
||||
|
||||
bv.push(TypeId::TagArray as u8);
|
||||
bv.push(TypeId::Tags as u8);
|
||||
|
||||
for tag in arr {
|
||||
write_tag(bv, tag)?;
|
||||
}
|
||||
|
||||
bv.push(TypeId::TagArrayEnd as u8);
|
||||
bv.push(TypeId::TagsEnd as u8);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
mod decode;
|
||||
mod encode;
|
||||
|
||||
mod types;
|
||||
|
||||
pub mod utils;
|
||||
pub use decode::*;
|
||||
pub use encode::*;
|
||||
pub use types::*;
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
use num_enum::TryFromPrimitive;
|
||||
use num_enum::{TryFromPrimitive, TryFromPrimitiveError};
|
||||
|
||||
use crate::daletl::DlTid;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum DaletPackError {
|
||||
|
@ -6,19 +8,46 @@ pub enum DaletPackError {
|
|||
ArrMaxSizeExceeded,
|
||||
PageMaxSizeExceeded,
|
||||
ZstdCompressError,
|
||||
ZstdDecompressError,
|
||||
|
||||
WriteNullBody,
|
||||
WriteNullArgument,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum DaletPackDecodeError {
|
||||
ZstdDecompressError,
|
||||
|
||||
InvalidSchema,
|
||||
InvalidTextSchema,
|
||||
InvalidTagsSchema,
|
||||
|
||||
UnknownTypeId,
|
||||
UnknownTagId,
|
||||
|
||||
InvalidArgument,
|
||||
}
|
||||
|
||||
impl From<TryFromPrimitiveError<TypeId>> for DaletPackDecodeError {
|
||||
fn from(_: TryFromPrimitiveError<TypeId>) -> Self {
|
||||
DaletPackDecodeError::UnknownTypeId
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TryFromPrimitiveError<DlTid>> for DaletPackDecodeError {
|
||||
fn from(_: TryFromPrimitiveError<DlTid>) -> Self {
|
||||
DaletPackDecodeError::UnknownTagId
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, TryFromPrimitive, Copy)]
|
||||
#[repr(u8)]
|
||||
pub enum TypeId {
|
||||
StrEnd = 0,
|
||||
Str,
|
||||
Int8,
|
||||
TagArray,
|
||||
TagArrayEnd,
|
||||
TextEnd = 0,
|
||||
Text,
|
||||
Number,
|
||||
Tags,
|
||||
TagsEnd,
|
||||
TagId,
|
||||
TagIdBody,
|
||||
TagIdArgument,
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
pub fn compress_zstd(data: &Vec<u8>) -> std::io::Result<Vec<u8>> {
|
||||
use std::u32::MAX;
|
||||
|
||||
pub fn compress_zstd(data: &[u8]) -> std::io::Result<Vec<u8>> {
|
||||
zstd::bulk::compress(data, 22)
|
||||
}
|
||||
|
||||
pub fn decompress_zstd(data: &[u8]) -> std::io::Result<Vec<u8>> {
|
||||
zstd::bulk::decompress(data, MAX as usize)
|
||||
}
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
mod from_daletl;
|
||||
mod from_typed;
|
||||
mod is_null_daletl;
|
||||
mod to_daletl;
|
||||
|
|
|
@ -6,6 +6,12 @@ pub struct Page {
|
|||
pub data: Vec<Tag>,
|
||||
}
|
||||
|
||||
impl Page {
|
||||
pub fn new(data: Vec<Tag>) -> Self {
|
||||
Self { data }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ConversionError;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use dalet::{
|
||||
daletpack::*,
|
||||
typed::{Hl, Page, TNArg, Tag::*},
|
||||
typed::{Hl, TNArg, Tag::*},
|
||||
};
|
||||
use flate2::Compression;
|
||||
use std::io::Write;
|
||||
|
@ -11,7 +11,7 @@ macro_rules! iprint {
|
|||
let start = std::time::Instant::now();
|
||||
let result = $func;
|
||||
let elapsed = start.elapsed();
|
||||
println!("{} ({:#?}): {} bytes", $name, elapsed, result.len());
|
||||
println!("{} ({:#?}): {}", $name, elapsed, result.len());
|
||||
|
||||
result
|
||||
}};
|
||||
|
@ -32,7 +32,7 @@ pub fn compress_zlib(data: &Vec<u8>) -> std::io::Result<Vec<u8>> {
|
|||
#[test]
|
||||
fn bench() {
|
||||
let page = vec![
|
||||
H("I am heading".into(), Hl::One),
|
||||
H("Heading 1".into(), Hl::One),
|
||||
H("Heading 2".into(), Hl::Two),
|
||||
P(vec![
|
||||
El("Some ".into()),
|
||||
|
@ -87,7 +87,7 @@ fn bench() {
|
|||
println!();
|
||||
|
||||
iprint!("Markdown zstd", utils::compress_zstd(&markdown).unwrap());
|
||||
iprint!("Daletpack zstd", utils::compress_zstd(&daletpack).unwrap());
|
||||
let daletpack = iprint!("Daletpack zstd", utils::compress_zstd(&daletpack).unwrap());
|
||||
iprint!(
|
||||
"Messagepack zstd",
|
||||
utils::compress_zstd(&messagepack).unwrap()
|
||||
|
@ -110,4 +110,13 @@ fn bench() {
|
|||
compress_deflate(&messagepack).unwrap()
|
||||
);
|
||||
iprint!("Bincode deflate", compress_deflate(&bincode).unwrap());
|
||||
|
||||
println!();
|
||||
|
||||
let decoded = iprint!(
|
||||
"Daletpack decode",
|
||||
Decoder::new(&daletpack).unwrap().decode().unwrap().data
|
||||
);
|
||||
|
||||
println!("{:#?}", decoded);
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use dalet::parsers::gemtext::parse_gemtext;
|
|||
fn gem_text() {
|
||||
let text = include_str!("./gemtext.gmi");
|
||||
|
||||
let parsed = parse_gemtext(&text).unwrap();
|
||||
let _ = parse_gemtext(&text).unwrap();
|
||||
|
||||
println!("{:#?}", parsed);
|
||||
// println!("{:#?}", parsed);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue