diff --git a/libs/rust/Cargo.lock b/libs/rust/Cargo.lock index fbf98dd..beaa4a3 100644 --- a/libs/rust/Cargo.lock +++ b/libs/rust/Cargo.lock @@ -3,25 +3,68 @@ version = 3 [[package]] -name = "bitvec" -version = "1.0.1" +name = "adler" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ - "funty", - "radium", - "tap", - "wyz", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cc" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", ] [[package]] name = "dalet" version = "1.0.0-pre4" dependencies = [ - "bitvec", + "bincode", + "flate2", "num_enum", + "rmp-serde", "serde", "serde_repr", + "zstd", ] [[package]] @@ -31,10 +74,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] -name = "funty" -version = "2.0.0" +name = "flate2" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] [[package]] name = "hashbrown" @@ -52,12 +99,45 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "num_enum" version = "0.7.3" @@ -79,6 +159,18 @@ dependencies = [ "syn", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + [[package]] name = "proc-macro-crate" version = "3.1.0" @@ -107,10 +199,26 @@ dependencies = [ ] [[package]] -name = "radium" -version = "0.7.0" +name = "rmp" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +dependencies = [ + "byteorder", + "num-traits", + "paste", +] + +[[package]] +name = "rmp-serde" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" +dependencies = [ + "byteorder", + "rmp", + "serde", +] [[package]] name = "serde" @@ -154,12 +262,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "toml_datetime" version = "0.6.8" @@ -193,10 +295,29 @@ dependencies = [ ] [[package]] -name = "wyz" -version = "0.5.1" +name = "zstd" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ - "tap", + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.12+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +dependencies = [ + "cc", + "pkg-config", ] diff --git a/libs/rust/Cargo.toml b/libs/rust/Cargo.toml index 7ff425b..3bea1be 100644 --- a/libs/rust/Cargo.toml +++ b/libs/rust/Cargo.toml @@ -12,10 +12,16 @@ keywords = ["dalet"] categories = ["compression", "compilers", "encoding"] [dependencies] -bitvec = "1.0.1" num_enum = "0.7.3" serde = { version = "1.0", features = ["derive"] } serde_repr = "0.1" +zstd = "0.13.2" + +[dev-dependencies] +rmp-serde = { version = "1.3.0" } +bincode = { version = "1.3.3" } +flate2 = "1.0" + [features] default = ["types", "daletpack"] diff --git a/libs/rust/src/daletl.rs b/libs/rust/src/daletl.rs index 1430a34..0f0fedf 100644 --- a/libs/rust/src/daletl.rs +++ b/libs/rust/src/daletl.rs @@ -59,7 +59,7 @@ impl IsNull for Argument { } } -#[derive(Serialize_repr, Deserialize_repr, Debug, Clone, PartialEq, Eq, TryFromPrimitive)] +#[derive(Serialize_repr, Deserialize_repr, Debug, Clone, PartialEq, Eq, TryFromPrimitive, Copy)] #[repr(u8)] /// Tag Id pub enum Tid { diff --git a/libs/rust/src/daletpack/encode.rs b/libs/rust/src/daletpack/encode.rs index d087dc6..0e5ff16 100644 --- a/libs/rust/src/daletpack/encode.rs +++ b/libs/rust/src/daletpack/encode.rs @@ -1,126 +1,103 @@ -use bitvec::{ - bits, - order::Msb0, - prelude::{BitVec, Lsb0}, - view::{AsBits, BitView}, -}; - use crate::daletl::{Argument, Body, IsNull, Tag}; -use super::utils::*; -use super::DaletPackError; +use super::{DaletPackError, TypeId}; -pub fn encode(root: Vec) -> Result, DaletPackError> { +pub fn encode(root: &Vec) -> Result, DaletPackError> { if root.len() > 2usize.pow(32) { return Err(DaletPackError::RootMaxSizeExceeded); } - let mut bv: BitVec = BitVec::new(); + let mut bv: Vec = Vec::new(); for tag in root { write_tag(&mut bv, tag)?; } - bv.set_uninitialized(false); - Ok(bv.into_vec()) + // Ok(zstd::bulk::compress(&bv, 200).map_err(|_| DaletPackError::ZstdCompressError)?) + Ok(bv) } -fn write_int(bv: &mut BitVec, n: u8) { - if n < 16 { - write_4bit(bv, 0); - write_4bit(bv, n); - } else { - write_4bit(bv, 1); - bv.extend_from_raw_slice(&[n]); - } +fn write_int(bv: &mut Vec, n: u8) { + bv.push(1); + bv.push(n); } -fn write_str(bv: &mut BitVec, string: String) -> Result<(), DaletPackError> { +fn write_str(bv: &mut Vec, string: &String) -> Result<(), DaletPackError> { let size = string.len(); if size > 2usize.pow(32) { return Err(DaletPackError::StrMaxSizeExceeded); } - if size <= 8 { - write_4bit(bv, 2); - write_3bit(bv, (size - 1) as u8); - } else if size <= 16 { - write_4bit(bv, 3); - write_4bit(bv, (size - 1) as u8); - } else if size <= 256 { - write_4bit(bv, 4); - bv.extend_from_raw_slice(&[(size - 1) as u8]); + if size <= 256 { + bv.push(TypeId::Str8 as u8); + bv.push((size - 1) as u8); } else if size <= 65536 { - write_4bit(bv, 5); - bv.extend_from_bitslice(&((size - 1) as u16).view_bits::()); + bv.push(TypeId::Str16 as u8); + bv.extend(((size - 1) as u16).to_be_bytes()); } else { - write_4bit(bv, 6); - bv.extend_from_bitslice(&((size - 1) as u32).view_bits::()); + bv.push(TypeId::Str32 as u8); + bv.extend(((size - 1) as u32).to_be_bytes()); } - bv.extend_from_bitslice(&string.as_bits::()); + bv.extend_from_slice(string.as_bytes()); Ok(()) } -fn write_array(bv: &mut BitVec, arr: Vec) -> Result<(), DaletPackError> { +fn write_array(bv: &mut Vec, arr: &Vec) -> Result<(), DaletPackError> { if arr.len() > 2usize.pow(32) { return Err(DaletPackError::ArrMaxSizeExceeded); } - write_4bit(bv, 7); + bv.push(TypeId::TagArray as u8); for tag in arr { write_tag(bv, tag)?; } - bv.extend_from_bitslice(&bits![1, 0]); + bv.push(TypeId::TagArrayEnd as u8); Ok(()) } -fn write_tag(bv: &mut BitVec, tag: Tag) -> Result<(), DaletPackError> { +fn write_tag(bv: &mut Vec, tag: &Tag) -> Result<(), DaletPackError> { if tag.body.is_null() && tag.argument.is_null() { - write_4bit(bv, 15); - write_tag_id(bv, tag.id as u8); + bv.push(TypeId::TagId as u8); + bv.push(tag.id as u8); } else if tag.argument.is_null() { - write_4bit(bv, 13); - write_tag_id(bv, tag.id as u8); - write_tag_body(bv, tag.body)?; + bv.push(TypeId::TagIdBody as u8); + bv.push(tag.id as u8); + write_tag_body(bv, &tag.body)?; } else if tag.body.is_null() { - write_4bit(bv, 14); - write_tag_id(bv, tag.id as u8); - write_tag_argument(bv, tag.argument)?; + bv.push(TypeId::TagIdArgument as u8); + bv.push(tag.id as u8); + write_tag_argument(bv, &tag.argument)?; } else { - write_4bit(bv, 15); - write_tag_id(bv, tag.id as u8); - write_tag_body(bv, tag.body)?; - write_tag_argument(bv, tag.argument)?; + bv.push(TypeId::TagIdBodyArgument as u8); + bv.push(tag.id as u8); + write_tag_body(bv, &tag.body)?; + write_tag_argument(bv, &tag.argument)?; } Ok(()) } -fn write_tag_id(bv: &mut BitVec, n: u8) { - bv.extend_from_bitslice(&n.view_bits::()[3..=7]); -} - -fn write_tag_body(bv: &mut BitVec, body: Body) -> Result<(), DaletPackError> { +fn write_tag_body(bv: &mut Vec, body: &Body) -> Result<(), DaletPackError> { match body { Body::Text(s) => write_str(bv, s)?, Body::Tags(tags) => write_array(bv, tags)?, - Body::Null => unreachable!("This function cannot be called with this value"), + Body::Null => unreachable!("Tag cannot be called with this value"), }; Ok(()) } -fn write_tag_argument(bv: &mut BitVec, argument: Argument) -> Result<(), DaletPackError> { +fn write_tag_argument(bv: &mut Vec, argument: &Argument) -> Result<(), DaletPackError> { match argument { Argument::Text(s) => write_str(bv, s)?, - Argument::Number(n) => write_int(bv, n), - Argument::Null => unreachable!("This function cannot be called with this value"), + Argument::Number(n) => write_int(bv, *n), + Argument::Null => unreachable!("Tag cannot be called with this value"), }; Ok(()) diff --git a/libs/rust/src/daletpack/types.rs b/libs/rust/src/daletpack/types.rs index 371a638..38a243f 100644 --- a/libs/rust/src/daletpack/types.rs +++ b/libs/rust/src/daletpack/types.rs @@ -1,6 +1,24 @@ +use num_enum::TryFromPrimitive; + #[derive(Debug, Clone, PartialEq, Eq)] pub enum DaletPackError { StrMaxSizeExceeded, ArrMaxSizeExceeded, RootMaxSizeExceeded, + ZstdCompressError, +} + +#[derive(Debug, Clone, PartialEq, Eq, TryFromPrimitive, Copy)] +#[repr(u8)] +pub enum TypeId { + Int8 = 1, + Str8 = 4, + Str16, + Str32, + TagArray, + TagArrayEnd, + TagId, + TagIdBody, + TagIdArgument, + TagIdBodyArgument, } diff --git a/libs/rust/src/daletpack/utils.rs b/libs/rust/src/daletpack/utils.rs index 4154b89..75bd95e 100644 --- a/libs/rust/src/daletpack/utils.rs +++ b/libs/rust/src/daletpack/utils.rs @@ -1,9 +1,3 @@ -use bitvec::{order::Msb0, prelude::BitVec, view::BitView}; - -pub fn write_3bit(bv: &mut BitVec, n: u8) { - bv.extend_from_bitslice(&n.view_bits::()[5..=7]); -} - -pub fn write_4bit(bv: &mut BitVec, n: u8) { - bv.extend_from_bitslice(&n.view_bits::()[4..=7]); +pub fn compress_zstd(data: &Vec) -> std::io::Result> { + zstd::bulk::compress(data, 5) } diff --git a/libs/rust/src/main.rs b/libs/rust/src/main.rs index 490674e..f328e4d 100644 --- a/libs/rust/src/main.rs +++ b/libs/rust/src/main.rs @@ -1,21 +1 @@ -use std::fs; - -use dalet::{ - abstractions::{HeadingLevel, Tag, ToDaletl}, - daletpack::*, -}; - -fn main() { - let dalet_page: Vec = vec![Tag::H("I am heading".to_owned(), HeadingLevel::One)]; - - let data = encode(dalet_page.to_daletl()).unwrap(); - - println!("{:#?}", data); - println!("{}", data.len()); - - let bits: Vec<_> = data.iter().map(|n| format!("{:b}", n)).collect(); - - println!("{}", bits.join("")); - // 11010000100111011010010010010000110000101101101001000011010011001010110000101100101101001011011111001111111111 - fs::write("./test.daletpack", data).unwrap(); -} +fn main() {} diff --git a/libs/rust/tests/bench.md b/libs/rust/tests/bench.md new file mode 100644 index 0000000..7f12a17 --- /dev/null +++ b/libs/rust/tests/bench.md @@ -0,0 +1,20 @@ +# Heading 1 +## Heading 2 +**Some bold and *italic* ~~text~~** + +`Hello world` + +- abc +- def + - defabc + - defdef +- xyz + +Lorem ipsum [![](https://my-picture)](https://some-link) dolor sit amet consequetur adipiscing elit + +|col1|col2|col3| +|:--:|----|---:| +|Never gonna|give you|up| +|Never gonna|let you|down| +|Never gonna|run around|and desert you| +|**abc**|![def](https://some-picture)|*xyz*| diff --git a/libs/rust/tests/bench.rs b/libs/rust/tests/bench.rs new file mode 100644 index 0000000..946c1aa --- /dev/null +++ b/libs/rust/tests/bench.rs @@ -0,0 +1,74 @@ +use dalet::{ + abstractions::{HeadingLevel, Tag, ToDaletl}, + daletpack::*, +}; +use flate2::Compression; +use std::io::Write; + +#[macro_export] +macro_rules! iprint { + ($name:expr, $func:expr) => {{ + let start = std::time::Instant::now(); + let result = $func; + let elapsed = start.elapsed(); + println!("{} ({:#?}): {} bytes", $name, elapsed, result.len()); + + result + }}; +} + +pub fn compress_deflate(data: &Vec) -> std::io::Result> { + let mut c = flate2::write::DeflateEncoder::new(Vec::new(), Compression::default()); + c.write(data)?; + c.finish() +} + +pub fn compress_zlib(data: &Vec) -> std::io::Result> { + let mut c = flate2::write::ZlibEncoder::new(Vec::new(), Compression::default()); + c.write(data)?; + c.finish() +} + +#[test] +fn bench() { + let mut page: Vec = vec![ + // Tag::H("I am heading".to_owned(), HeadingLevel::One), + // Tag::H("Heading 2".to_owned(), HeadingLevel::Two), + ]; + + for i in 0..500 { + page.push(Tag::H(format!("{}. Heading", i), HeadingLevel::One)) + } + + let dalet_page = page.to_daletl(); + + let daletpack = iprint!("Daletpack", encode(&dalet_page).unwrap()); + let messagepack = iprint!("Messagepack", rmp_serde::to_vec(&dalet_page).unwrap()); + let bincode = iprint!("Bincode", bincode::serialize(&dalet_page).unwrap()); + + println!(); + + iprint!("Daletpack zstd", utils::compress_zstd(&daletpack).unwrap()); + iprint!( + "Messagepack zstd", + utils::compress_zstd(&messagepack).unwrap() + ); + iprint!("Bincode zstd", utils::compress_zstd(&bincode).unwrap()); + + println!(); + + iprint!("Daletpack Zlib", compress_zlib(&daletpack).unwrap()); + iprint!("Messagepack Zlib", compress_zlib(&messagepack).unwrap()); + iprint!("Bincode Zlib", compress_zlib(&bincode).unwrap()); + + println!(); + + iprint!("Daletpack deflate", compress_deflate(&daletpack).unwrap()); + iprint!( + "Messagepack deflate", + compress_deflate(&messagepack).unwrap() + ); + iprint!("Bincode deflate", compress_deflate(&bincode).unwrap()); + + // fs::write("./test.daletpack", daletpack).unwrap(); +}