From 4273bc851a510388a6bd56b406820c1007bb323b Mon Sep 17 00:00:00 2001 From: Artemy Egorov Date: Sat, 27 Jul 2024 17:44:34 +0300 Subject: [PATCH] doc: add daletpack formats overview --- README.md | 2 +- libs/typescript/src/daletl/main.ts | 4 -- libs/typescript/src/daletl/normalizers.ts | 11 +++--- libs/typescript/src/daletl/tags/heading.ts | 8 ++-- libs/typescript/src/daletl/types.ts | 5 ++- specification/daletl.md | 20 ++-------- specification/daletpack.md | 46 ++++++++++++++++++++++ specification/main.md | 5 ++- 8 files changed, 66 insertions(+), 35 deletions(-) create mode 100644 specification/daletpack.md diff --git a/README.md b/README.md index ffe5aff..c53c518 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Markup language ecosystem сombining small file size, big number of possibilitie [Specification](./specification/main.md). > [!WARNING] -> Specification is not complete. +> Specification is not complete and very unstable. ## Concept diff --git a/libs/typescript/src/daletl/main.ts b/libs/typescript/src/daletl/main.ts index 07d3e23..fefaee9 100644 --- a/libs/typescript/src/daletl/main.ts +++ b/libs/typescript/src/daletl/main.ts @@ -16,10 +16,6 @@ export function parseTag(raw_tag: RawTag): Tag { return new El(raw_tag); } - if (typeof raw_tag === "number") { - return TagNormalizers[raw_tag]([raw_tag, null]); - } - if (Array.isArray(raw_tag)) { if (Array.isArray(raw_tag[0])) { raw_tag = raw_tag as RawTag[]; diff --git a/libs/typescript/src/daletl/normalizers.ts b/libs/typescript/src/daletl/normalizers.ts index d773bb9..007216f 100644 --- a/libs/typescript/src/daletl/normalizers.ts +++ b/libs/typescript/src/daletl/normalizers.ts @@ -4,13 +4,12 @@ import Heading from "./tags/heading"; import { RawTagAsArray } from "./types"; import { z } from "zod"; +const textOrTag = z.custom((b) => b !== null); +const text = z.string(); + const TagNormalizers = [ - n( - z.custom((b) => b !== null), - z.any(), - El - ), - n(z.string(), z.number().int().min(1).max(6), Heading), + n(textOrTag, z.any(), El), + n(text, z.number().int().min(1).max(6).nullable(), Heading), ]; export { TagNormalizers }; diff --git a/libs/typescript/src/daletl/tags/heading.ts b/libs/typescript/src/daletl/tags/heading.ts index 34d9c95..fea1b21 100644 --- a/libs/typescript/src/daletl/tags/heading.ts +++ b/libs/typescript/src/daletl/tags/heading.ts @@ -2,14 +2,14 @@ import { chtml } from "../../utils"; import { Tag } from "../types"; export default class Heading extends Tag { - constructor(body: string, argument: number) { - super(1, body, argument); + constructor(body: string, argument?: number | null) { + super(1, body, argument || null); } toHtml(classes?: boolean): string { return chtml( - `h${this.argument}`, - `h hl${this.argument}`, + `h${this.argument || 1}`, + `h hl${this.argument || 1}`, classes, this.body ); diff --git a/libs/typescript/src/daletl/types.ts b/libs/typescript/src/daletl/types.ts index 9950b70..a5baeaa 100644 --- a/libs/typescript/src/daletl/types.ts +++ b/libs/typescript/src/daletl/types.ts @@ -38,7 +38,10 @@ export abstract class Tag { encode(): Uint8Array { return encode(this.raw); } - abstract toHtml(classes?: boolean): string; + // eslint-disable-next-line @typescript-eslint/no-unused-vars + toHtml(classes?: boolean): string { + return ""; + } } export type Body = string | Tag[] | null; diff --git a/specification/daletl.md b/specification/daletl.md index a95a99f..83bd4ac 100644 --- a/specification/daletl.md +++ b/specification/daletl.md @@ -2,7 +2,7 @@ ## Data format -Daletl must be serialized as [MessagePack](https://github.com/msgpack/msgpack/blob/master/spec.md). All data transfer between server and client is done in this format. +Daletl must be serialized as [DaletPack](./daletpack.md). All data transfer between server and client is done in this format. ### Root @@ -20,10 +20,10 @@ Each tag may be one of four types: #### Data Representation -##### As array of 2-3 elements +##### As array of 1-3 elements 1. Tag id -2. Tag body +2. Tag body (optional if argument is null) 3. Tag argument (optional) Tag id is integer number. @@ -50,20 +50,6 @@ Argument can be number or string. ] ``` -##### As number - -Number becomes tag. - -```json5 -1 -``` - -equals to - -```json5 -[1, null] -``` - ##### As string String becomes element tag. diff --git a/specification/daletpack.md b/specification/daletpack.md new file mode 100644 index 0000000..ee14b97 --- /dev/null +++ b/specification/daletpack.md @@ -0,0 +1,46 @@ +# DaletPack specification for Dalet v1.0-preview + +DaletPack is an binary data format for Dalet, that is used for minimizing the size of transmitted daletl data. DaletPack is designed specifically to transfer as little data as possible, it compresses the declaration of tag types into the smallest possible volume. Nothing unnecessary is transferred. + +All data must be compressed in [Zstandard](https://datatracker.ietf.org/doc/html/rfc8878) format. + +## Types (12) + +- **Null** (1) +- **Integer** (1) +- **String** (6) +- **Array** (3) +- **Tags** (3) + - **Tag only with id** (1) + - **Tag with id and body** (1) + - **Tag with id, body and argument** (1) + +## Limitations + +- a value of integer (5 bits) must be between -15 and 15 +- maximum byte size of a String object is (2^32)-1 +- string must be encoded in UTF-8 +- maximum number of elements of an Array object is (2^32)-1 + +## Formats + +### Overview + +| name | id | id-bits | +| ------------------------ | --- | ------- | +| null | 0 | 0000 | +| int | 1 | 0001 | +| str 3 | 2 | 0010 | +| str 4 | 3 | 0011 | +| str 6 | 4 | 0100 | +| str 8 | 5 | 0101 | +| str 16 | 6 | 0110 | +| str 32 | 7 | 0111 | +| arr 2 | 8 | 1000 | +| arr 4 | 9 | 1001 | +| arr 8 | 10 | 1010 | +| arr 16 | 11 | 1011 | +| arr 32 | 12 | 1100 | +| tag (id) | 13 | 1101 | +| tag (id, body) | 14 | 1110 | +| tag (id, body, argument) | 15 | 1111 | diff --git a/specification/main.md b/specification/main.md index 452eebd..228fc65 100644 --- a/specification/main.md +++ b/specification/main.md @@ -7,6 +7,7 @@ - [Tags](./tags.md) - [Daletl](./daletl.md) - [Daleth](./daleth.md) +- [DaletPack](./daletpack.md) ## Languages @@ -16,13 +17,13 @@ ### Daletl -**Daletl** is a low-level language for machines. It is used in data transmission, processing and generation. It is specifically optimized to transfer as little data as possible using MessagePack. +**Daletl** is a low-level language for machines. It is used in data transmission, processing and generation. It is specifically optimized to transfer as little data as possible using DaletPack. ## Stages ### Stage 1 (optional) -In the first stage, the daleth language is parsed and converted to daletl. All tags becomes an array of properties `[tag_id, body, argument]`, so that they take up less space in json or messagepack, for example. **This stage is not for data transmission**. +In the first stage, the daleth language is parsed and converted to daletl. All tags becomes an array of properties `[tag_id, body, argument]`, so that they take up less space in the transmitted data, for example. **This stage is not for data transmission**. ### Stage 2