From fd5c69d7abcd604d381dea3a1250d3cc00b75617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=96=E7=95=8C?= Date: Mon, 19 Dec 2022 12:43:33 +0800 Subject: [PATCH] Merge abx decoder --- common/abx/internal/constraints.go | 32 ++++ common/abx/reader.go | 253 +++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+) create mode 100644 common/abx/internal/constraints.go create mode 100644 common/abx/reader.go diff --git a/common/abx/internal/constraints.go b/common/abx/internal/constraints.go new file mode 100644 index 0000000..fb2573a --- /dev/null +++ b/common/abx/internal/constraints.go @@ -0,0 +1,32 @@ +package internal + +var ProtocolMagicVersion0 = []byte{0x41, 0x42, 0x58, 0x00} + +const ( + StartDocument = 0 + EndDocument = 1 + StartTag = 2 + EndTag = 3 + TEXT = 4 + CDSECT = 5 + EntityRef = 6 + IgnorableWhitespace = 7 + ProcessingInstruction = 8 + COMMENT = 9 + DOCDECL = 10 + ATTRIBUTE = 15 + TypeNull = 1 << 4 + TypeString = 2 << 4 + TypeStringInterned = 3 << 4 + TypeBytesHex = 4 << 4 + TypeBytesBase64 = 5 << 4 + TypeInt = 6 << 4 + TypeIntHex = 7 << 4 + TypeLong = 8 << 4 + TypeLongHex = 9 << 4 + TypeFloat = 10 << 4 + TypeDouble = 11 << 4 + TypeBooleanTrue = 12 << 4 + TypeBooleanFalse = 13 << 4 + MaxUnsignedShort = 65535 +) diff --git a/common/abx/reader.go b/common/abx/reader.go new file mode 100644 index 0000000..429642e --- /dev/null +++ b/common/abx/reader.go @@ -0,0 +1,253 @@ +package abx + +import ( + "bytes" + "encoding/base64" + "encoding/binary" + "encoding/hex" + "encoding/xml" + "io" + "strconv" + + . "github.com/sagernet/sing/common/abx/internal" + E "github.com/sagernet/sing/common/exceptions" +) + +var _ xml.TokenReader = (*Reader)(nil) + +type Reader struct { + reader *bytes.Reader + stringRefs []string + attrs []xml.Attr +} + +func NewReader(content []byte) (xml.TokenReader, bool) { + if len(content) < 4 || !bytes.Equal(content[:4], ProtocolMagicVersion0) { + return nil, false + } + return &Reader{reader: bytes.NewReader(content)}, true +} + +func (r *Reader) Token() (token xml.Token, err error) { + event, err := r.reader.ReadByte() + if err != nil { + return + } + tokenType := event & 0x0f + eventType := event & 0xf0 + switch tokenType { + case StartDocument: + return + case EndDocument: + return nil, io.EOF + case StartTag: + var name string + name, err = r.readInternedUTF() + if err != nil { + return + } + var attrs []xml.Attr + attrs, err = r.pullAttributes() + if err != nil { + return + } + return xml.StartElement{Name: xml.Name{Local: name}, Attr: attrs}, nil + case EndTag: + var name string + name, err = r.readInternedUTF() + if err != nil { + return + } + return xml.EndElement{Name: xml.Name{Local: name}}, nil + case TEXT: + var data string + data, err = r.readUTF() + if err != nil { + return + } + return xml.CharData(data), nil + case CDSECT: + var data string + data, err = r.readUTF() + if err != nil { + return + } + return xml.Directive(""), nil + case ProcessingInstruction: + _, err = r.readUTF() + return + case COMMENT: + var data string + data, err = r.readUTF() + if err != nil { + return + } + return xml.Comment(data), nil + case DOCDECL: + _, err = r.readUTF() + return + case IgnorableWhitespace: + _, err = r.readUTF() + return + case EntityRef: + _, err = r.readUTF() + return + case ATTRIBUTE: + return nil, E.New("unexpected attribute") + } + return nil, E.New("unknown token type ", tokenType, " with type ", eventType) +} + +func (r *Reader) pullAttributes() ([]xml.Attr, error) { + err := r.pullAttribute() + if err != nil { + return nil, err + } + attrs := r.attrs + r.attrs = nil + return attrs, nil +} + +func (r *Reader) pullAttribute() error { + event, err := r.reader.ReadByte() + if err != nil { + return nil + } + tokenType := event & 0x0f + eventType := event & 0xf0 + if tokenType != ATTRIBUTE { + return r.reader.UnreadByte() + } + var name string + name, err = r.readInternedUTF() + if err != nil { + return err + } + var value string + switch eventType { + case TypeNull: + value = "" + case TypeBooleanTrue: + value = "true" + case TypeBooleanFalse: + value = "false" + case TypeString: + value, err = r.readUTF() + if err != nil { + return err + } + case TypeStringInterned: + value, err = r.readInternedUTF() + if err != nil { + return err + } + case TypeBytesHex: + var data []byte + data, err = r.readBytes() + if err != nil { + return err + } + value = hex.EncodeToString(data) + case TypeBytesBase64: + var data []byte + data, err = r.readBytes() + if err != nil { + return err + } + value = base64.StdEncoding.EncodeToString(data) + case TypeInt: + var data int32 + err = binary.Read(r.reader, binary.BigEndian, &data) + if err != nil { + return err + } + value = strconv.FormatInt(int64(data), 10) + case TypeIntHex: + var data int32 + err = binary.Read(r.reader, binary.BigEndian, &data) + if err != nil { + return err + } + value = "0x" + strconv.FormatInt(int64(data), 16) + case TypeLong: + var data int64 + err = binary.Read(r.reader, binary.BigEndian, &data) + if err != nil { + return err + } + value = strconv.FormatInt(data, 10) + case TypeLongHex: + var data int64 + err = binary.Read(r.reader, binary.BigEndian, &data) + if err != nil { + return err + } + value = "0x" + strconv.FormatInt(data, 16) + case TypeFloat: + var data float32 + err = binary.Read(r.reader, binary.BigEndian, &data) + if err != nil { + return err + } + value = strconv.FormatFloat(float64(data), 'g', -1, 32) + case TypeDouble: + var data float64 + err = binary.Read(r.reader, binary.BigEndian, &data) + if err != nil { + return err + } + value = strconv.FormatFloat(data, 'g', -1, 64) + default: + return E.New("unexpected attribute type, ", eventType) + } + r.attrs = append(r.attrs, xml.Attr{Name: xml.Name{Local: name}, Value: value}) + return r.pullAttribute() +} + +func (r *Reader) readUnsignedShort() (uint16, error) { + var value uint16 + err := binary.Read(r.reader, binary.BigEndian, &value) + return value, err +} + +func (r *Reader) readInternedUTF() (utf string, err error) { + ref, err := r.readUnsignedShort() + if err != nil { + return + } + if ref == MaxUnsignedShort { + utf, err = r.readUTF() + if err != nil { + return + } + if len(r.stringRefs) < MaxUnsignedShort { + r.stringRefs = append(r.stringRefs, utf) + } + return + } + if int(ref) >= len(r.stringRefs) { + err = E.New("invalid interned reference: ", ref, ", exists: ", len(r.stringRefs)) + return + } + utf = r.stringRefs[ref] + return +} + +func (r *Reader) readUTF() (utf string, err error) { + data, err := r.readBytes() + if err != nil { + return + } + utf = string(data) + return +} + +func (r *Reader) readBytes() (data []byte, err error) { + length, err := r.readUnsignedShort() + if err != nil { + return + } + data = make([]byte, length) + _, err = io.ReadFull(r.reader, data) + return +}