Switch from glide to dep. Check in vendor/

This commit is contained in:
Ryan Boehning 2018-01-11 13:38:50 -08:00
parent 9a3cd91cd7
commit f44e11fa65
498 changed files with 74787 additions and 32 deletions

25
vendor/github.com/aead/chacha20/.gitignore generated vendored Normal file
View file

@ -0,0 +1,25 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
.vscode
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof

19
vendor/github.com/aead/chacha20/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,19 @@
language: go
go:
- 1.5.3
- 1.6
- 1.7
- 1.8
- master
env:
- TRAVIS_GOARCH=amd64
- TRAVIS_GOARCH=386
before_install:
- export GOARCH=$TRAVIS_GOARCH
branches:
only:
- master

21
vendor/github.com/aead/chacha20/LICENSE generated vendored Normal file
View file

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2016 Andreas Auernhammer
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

79
vendor/github.com/aead/chacha20/README.md generated vendored Normal file
View file

@ -0,0 +1,79 @@
[![Godoc Reference](https://godoc.org/github.com/aead/chacha20?status.svg)](https://godoc.org/github.com/aead/chacha20)
## The ChaCha20 stream cipher
ChaCha is a stream cipher family created by Daniel J. Bernstein.
The most common ChaCha cipher is ChaCha20 (20 rounds). ChaCha20 is standardized in [RFC 7539](https://tools.ietf.org/html/rfc7539 "RFC 7539").
This package provides implementations of three ChaCha versions:
- ChaCha20 with a 64 bit nonce (can en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
- ChaCha20 with a 96 bit nonce (can en/decrypt up to 2^32 * 64 bytes ~ 256 GB for one key-nonce combination)
- XChaCha20 with a 192 bit nonce (can en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
Furthermore the chacha subpackage implements ChaCha20/12 and ChaCha20/8.
These versions use 12 or 8 rounds instead of 20.
But it's recommended to use ChaCha20 (with 20 rounds) - it will be fast enough for almost all purposes.
### Installation
Install in your GOPATH: `go get -u github.com/aead/chacha20`
### Requirements
All go versions >= 1.5.3 are supported.
Please notice, that the amd64 AVX2 asm implementation requires go1.7 or newer.
### Performance
#### AMD64
Hardware: Intel i7-6500U 2.50GHz x 2
System: Linux Ubuntu 16.04 - kernel: 4.4.0-62-generic
Go version: 1.8.0
```
AVX2
name speed cpb
ChaCha20_64-4 573MB/s ± 0% 4.16
ChaCha20_1K-4 2.19GB/s ± 0% 1.06
XChaCha20_64-4 261MB/s ± 0% 9.13
XChaCha20_1K-4 1.69GB/s ± 4% 1.37
XORKeyStream64-4 474MB/s ± 2% 5.02
XORKeyStream1K-4 2.09GB/s ± 1% 1.11
XChaCha20_XORKeyStream64-4 262MB/s ± 0% 9.09
XChaCha20_XORKeyStream1K-4 1.71GB/s ± 1% 1.36
SSSE3
name speed cpb
ChaCha20_64-4 583MB/s ± 0% 4.08
ChaCha20_1K-4 1.15GB/s ± 1% 2.02
XChaCha20_64-4 267MB/s ± 0% 8.92
XChaCha20_1K-4 984MB/s ± 5% 2.42
XORKeyStream64-4 492MB/s ± 1% 4.84
XORKeyStream1K-4 1.10GB/s ± 5% 2.11
XChaCha20_XORKeyStream64-4 266MB/s ± 0% 8.96
XChaCha20_XORKeyStream1K-4 1.00GB/s ± 2% 2.32
```
#### 386
Hardware: Intel i7-6500U 2.50GHz x 2
System: Linux Ubuntu 16.04 - kernel: 4.4.0-62-generic
Go version: 1.8.0
```
SSSE3
name                        speed cpb
ChaCha20_64-4               570MB/s ± 0% 4.18
ChaCha20_1K-4               650MB/s ± 0% 3.66
XChaCha20_64-4              223MB/s ± 0% 10.69
XChaCha20_1K-4              584MB/s ± 1% 4.08
XORKeyStream64-4            392MB/s ± 1% 6.08
XORKeyStream1K-4            629MB/s ± 1% 3.79
XChaCha20_XORKeyStream64-4  222MB/s ± 0% 10.73
XChaCha20_XORKeyStream1K-4  585MB/s ± 0% 4.07
SSE2
name speed cpb
ChaCha20_64-4 509MB/s ± 0% 4.68
ChaCha20_1K-4 553MB/s ± 2% 4.31
XChaCha20_64-4 201MB/s ± 0% 11.86
XChaCha20_1K-4 498MB/s ± 4% 4.78
XORKeyStream64-4 359MB/s ± 1% 6.64
XORKeyStream1K-4 545MB/s ± 0% 4.37
XChaCha20_XORKeyStream64-4 201MB/s ± 1% 11.86
XChaCha20_XORKeyStream1K-4 507MB/s ± 0% 4.70
```

176
vendor/github.com/aead/chacha20/chacha/chacha.go generated vendored Normal file
View file

@ -0,0 +1,176 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// Package chacha implements some low-level functions of the
// ChaCha cipher family.
package chacha // import "github.com/aead/chacha20/chacha"
import (
"encoding/binary"
"errors"
)
const (
// NonceSize is the size of the ChaCha20 nonce in bytes.
NonceSize = 8
// INonceSize is the size of the IETF-ChaCha20 nonce in bytes.
INonceSize = 12
// XNonceSize is the size of the XChaCha20 nonce in bytes.
XNonceSize = 24
// KeySize is the size of the key in bytes.
KeySize = 32
)
var (
useSSE2 bool
useSSSE3 bool
useAVX2 bool
)
var (
errKeySize = errors.New("chacha20/chacha: bad key length")
errInvalidNonce = errors.New("chacha20/chacha: bad nonce length")
)
func setup(state *[64]byte, nonce, key []byte) (err error) {
if len(key) != KeySize {
err = errKeySize
return
}
var Nonce [16]byte
switch len(nonce) {
case NonceSize:
copy(Nonce[8:], nonce)
initialize(state, key, &Nonce)
case INonceSize:
copy(Nonce[4:], nonce)
initialize(state, key, &Nonce)
case XNonceSize:
var tmpKey [32]byte
var hNonce [16]byte
copy(hNonce[:], nonce[:16])
copy(tmpKey[:], key)
hChaCha20(&tmpKey, &hNonce, &tmpKey)
copy(Nonce[8:], nonce[16:])
initialize(state, tmpKey[:], &Nonce)
// BUG(aead): A "good" compiler will remove this (optimizations)
// But using the provided key instead of tmpKey,
// will change the key (-> probably confuses users)
for i := range tmpKey {
tmpKey[i] = 0
}
default:
err = errInvalidNonce
}
return
}
// XORKeyStream crypts bytes from src to dst using the given nonce and key.
// The length of the nonce determinds the version of ChaCha20:
// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
// The rounds argument specifies the number of rounds performed for keystream
// generation - valid values are 8, 12 or 20. The src and dst may be the same slice
// but otherwise should not overlap. If len(dst) < len(src) this function panics.
// If the nonce is neither 64, 96 nor 192 bits long, this function panics.
func XORKeyStream(dst, src, nonce, key []byte, rounds int) {
if rounds != 20 && rounds != 12 && rounds != 8 {
panic("chacha20/chacha: bad number of rounds")
}
if len(dst) < len(src) {
panic("chacha20/chacha: dst buffer is to small")
}
if len(nonce) == INonceSize && uint64(len(src)) > (1<<38) {
panic("chacha20/chacha: src is too large")
}
var block, state [64]byte
if err := setup(&state, nonce, key); err != nil {
panic(err)
}
xorKeyStream(dst, src, &block, &state, rounds)
}
// Cipher implements ChaCha20/r (XChaCha20/r) for a given number of rounds r.
type Cipher struct {
state, block [64]byte
off int
rounds int // 20 for ChaCha20
noncesize int
}
// NewCipher returns a new *chacha.Cipher implementing the ChaCha20/r or XChaCha20/r
// (r = 8, 12 or 20) stream cipher. The nonce must be unique for one key for all time.
// The length of the nonce determinds the version of ChaCha20:
// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
// If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned.
func NewCipher(nonce, key []byte, rounds int) (*Cipher, error) {
if rounds != 20 && rounds != 12 && rounds != 8 {
panic("chacha20/chacha: bad number of rounds")
}
c := new(Cipher)
if err := setup(&(c.state), nonce, key); err != nil {
return nil, err
}
c.rounds = rounds
if len(nonce) == INonceSize {
c.noncesize = INonceSize
} else {
c.noncesize = NonceSize
}
return c, nil
}
// XORKeyStream crypts bytes from src to dst. Src and dst may be the same slice
// but otherwise should not overlap. If len(dst) < len(src) the function panics.
func (c *Cipher) XORKeyStream(dst, src []byte) {
if len(dst) < len(src) {
panic("chacha20/chacha: dst buffer is to small")
}
if c.off > 0 {
n := len(c.block[c.off:])
if len(src) <= n {
for i, v := range src {
dst[i] = v ^ c.block[c.off]
c.off++
}
if c.off == 64 {
c.off = 0
}
return
}
for i, v := range c.block[c.off:] {
dst[i] = src[i] ^ v
}
src = src[n:]
dst = dst[n:]
c.off = 0
}
c.off += xorKeyStream(dst, src, &(c.block), &(c.state), c.rounds)
}
// SetCounter skips ctr * 64 byte blocks. SetCounter(0) resets the cipher.
// This function always skips the unused keystream of the current 64 byte block.
func (c *Cipher) SetCounter(ctr uint64) {
if c.noncesize == INonceSize {
binary.LittleEndian.PutUint32(c.state[48:], uint32(ctr))
} else {
binary.LittleEndian.PutUint64(c.state[48:], ctr)
}
c.off = 0
}

View file

@ -0,0 +1,542 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build go1.7,amd64,!gccgo,!appengine,!nacl
#include "textflag.h"
DATA ·sigma_AVX<>+0x00(SB)/4, $0x61707865
DATA ·sigma_AVX<>+0x04(SB)/4, $0x3320646e
DATA ·sigma_AVX<>+0x08(SB)/4, $0x79622d32
DATA ·sigma_AVX<>+0x0C(SB)/4, $0x6b206574
GLOBL ·sigma_AVX<>(SB), (NOPTR+RODATA), $16
DATA ·one_AVX<>+0x00(SB)/8, $1
DATA ·one_AVX<>+0x08(SB)/8, $0
GLOBL ·one_AVX<>(SB), (NOPTR+RODATA), $16
DATA ·one_AVX2<>+0x00(SB)/8, $0
DATA ·one_AVX2<>+0x08(SB)/8, $0
DATA ·one_AVX2<>+0x10(SB)/8, $1
DATA ·one_AVX2<>+0x18(SB)/8, $0
GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32
DATA ·two_AVX2<>+0x00(SB)/8, $2
DATA ·two_AVX2<>+0x08(SB)/8, $0
DATA ·two_AVX2<>+0x10(SB)/8, $2
DATA ·two_AVX2<>+0x18(SB)/8, $0
GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32
DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302
DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302
DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A
GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32
DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003
DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003
DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B
GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32
#define ROTL(n, t, v) \
VPSLLD $n, v, t; \
VPSRLD $(32-n), v, v; \
VPXOR v, t, v
#define CHACHA_QROUND(v0, v1, v2, v3, t, c16, c8) \
VPADDD v0, v1, v0; \
VPXOR v3, v0, v3; \
VPSHUFB c16, v3, v3; \
VPADDD v2, v3, v2; \
VPXOR v1, v2, v1; \
ROTL(12, t, v1); \
VPADDD v0, v1, v0; \
VPXOR v3, v0, v3; \
VPSHUFB c8, v3, v3; \
VPADDD v2, v3, v2; \
VPXOR v1, v2, v1; \
ROTL(7, t, v1)
#define CHACHA_SHUFFLE(v1, v2, v3) \
VPSHUFD $0x39, v1, v1; \
VPSHUFD $0x4E, v2, v2; \
VPSHUFD $-109, v3, v3
#define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
VMOVDQU (0+off)(src), t0; \
VPERM2I128 $32, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (0+off)(dst); \
VMOVDQU (32+off)(src), t0; \
VPERM2I128 $32, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (32+off)(dst); \
VMOVDQU (64+off)(src), t0; \
VPERM2I128 $49, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (64+off)(dst); \
VMOVDQU (96+off)(src), t0; \
VPERM2I128 $49, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (96+off)(dst)
#define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
VMOVDQU (0+off)(src), t0; \
VPERM2I128 $32, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (0+off)(dst); \
VMOVDQU (32+off)(src), t0; \
VPERM2I128 $32, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (32+off)(dst); \
#define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \
VPERM2I128 $49, v1, v0, t0; \
VMOVDQU t0, 0(dst); \
VPERM2I128 $49, v3, v2, t0; \
VMOVDQU t0, 32(dst)
#define XOR_AVX(dst, src, off, v0, v1, v2, v3, t0) \
VPXOR 0+off(src), v0, t0; \
VMOVDQU t0, 0+off(dst); \
VPXOR 16+off(src), v1, t0; \
VMOVDQU t0, 16+off(dst); \
VPXOR 32+off(src), v2, t0; \
VMOVDQU t0, 32+off(dst); \
VPXOR 48+off(src), v3, t0; \
VMOVDQU t0, 48+off(dst)
#define TWO 0(SP)
#define C16 32(SP)
#define C8 64(SP)
#define STATE_0 96(SP)
#define STATE_1 128(SP)
#define STATE_2 160(SP)
#define STATE_3 192(SP)
#define TMP_0 224(SP)
#define TMP_1 256(SP)
// func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamAVX2(SB), 4, $320-80
MOVQ dst_base+0(FP), DI
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), CX
MOVQ block+48(FP), BX
MOVQ state+56(FP), AX
MOVQ rounds+64(FP), DX
MOVQ SP, R8
ADDQ $32, SP
ANDQ $-32, SP
VMOVDQU 0(AX), Y2
VMOVDQU 32(AX), Y3
VPERM2I128 $0x22, Y2, Y0, Y0
VPERM2I128 $0x33, Y2, Y1, Y1
VPERM2I128 $0x22, Y3, Y2, Y2
VPERM2I128 $0x33, Y3, Y3, Y3
TESTQ CX, CX
JZ done
VMOVDQU ·one_AVX2<>(SB), Y4
VPADDD Y4, Y3, Y3
VMOVDQA Y0, STATE_0
VMOVDQA Y1, STATE_1
VMOVDQA Y2, STATE_2
VMOVDQA Y3, STATE_3
VMOVDQU ·rol16_AVX2<>(SB), Y4
VMOVDQU ·rol8_AVX2<>(SB), Y5
VMOVDQU ·two_AVX2<>(SB), Y6
VMOVDQA Y4, Y14
VMOVDQA Y5, Y15
VMOVDQA Y4, C16
VMOVDQA Y5, C8
VMOVDQA Y6, TWO
CMPQ CX, $64
JBE between_0_and_64
CMPQ CX, $192
JBE between_64_and_192
CMPQ CX, $320
JBE between_192_and_320
CMPQ CX, $448
JBE between_320_and_448
at_least_512:
VMOVDQA Y0, Y4
VMOVDQA Y1, Y5
VMOVDQA Y2, Y6
VPADDQ TWO, Y3, Y7
VMOVDQA Y0, Y8
VMOVDQA Y1, Y9
VMOVDQA Y2, Y10
VPADDQ TWO, Y7, Y11
VMOVDQA Y0, Y12
VMOVDQA Y1, Y13
VMOVDQA Y2, Y14
VPADDQ TWO, Y11, Y15
MOVQ DX, R9
chacha_loop_512:
VMOVDQA Y8, TMP_0
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y8, C16, C8)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y8, C16, C8)
VMOVDQA TMP_0, Y8
VMOVDQA Y0, TMP_0
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y0, C16, C8)
CHACHA_QROUND(Y12, Y13, Y14, Y15, Y0, C16, C8)
CHACHA_SHUFFLE(Y1, Y2, Y3)
CHACHA_SHUFFLE(Y5, Y6, Y7)
CHACHA_SHUFFLE(Y9, Y10, Y11)
CHACHA_SHUFFLE(Y13, Y14, Y15)
CHACHA_QROUND(Y12, Y13, Y14, Y15, Y0, C16, C8)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y0, C16, C8)
VMOVDQA TMP_0, Y0
VMOVDQA Y8, TMP_0
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y8, C16, C8)
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y8, C16, C8)
VMOVDQA TMP_0, Y8
CHACHA_SHUFFLE(Y3, Y2, Y1)
CHACHA_SHUFFLE(Y7, Y6, Y5)
CHACHA_SHUFFLE(Y11, Y10, Y9)
CHACHA_SHUFFLE(Y15, Y14, Y13)
SUBQ $2, R9
JA chacha_loop_512
VMOVDQA Y12, TMP_0
VMOVDQA Y13, TMP_1
VPADDD STATE_0, Y0, Y0
VPADDD STATE_1, Y1, Y1
VPADDD STATE_2, Y2, Y2
VPADDD STATE_3, Y3, Y3
XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
VMOVDQA STATE_0, Y0
VMOVDQA STATE_1, Y1
VMOVDQA STATE_2, Y2
VMOVDQA STATE_3, Y3
VPADDQ TWO, Y3, Y3
VPADDD Y0, Y4, Y4
VPADDD Y1, Y5, Y5
VPADDD Y2, Y6, Y6
VPADDD Y3, Y7, Y7
XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
VPADDQ TWO, Y3, Y3
VPADDD Y0, Y8, Y8
VPADDD Y1, Y9, Y9
VPADDD Y2, Y10, Y10
VPADDD Y3, Y11, Y11
XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
VPADDQ TWO, Y3, Y3
VPADDD TMP_0, Y0, Y12
VPADDD TMP_1, Y1, Y13
VPADDD Y2, Y14, Y14
VPADDD Y3, Y15, Y15
VPADDQ TWO, Y3, Y3
CMPQ CX, $512
JB less_than_512
XOR_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
VMOVDQA Y3, STATE_3
ADDQ $512, SI
ADDQ $512, DI
SUBQ $512, CX
CMPQ CX, $448
JA at_least_512
TESTQ CX, CX
JZ done
VMOVDQA C16, Y14
VMOVDQA C8, Y15
CMPQ CX, $64
JBE between_0_and_64
CMPQ CX, $192
JBE between_64_and_192
CMPQ CX, $320
JBE between_192_and_320
JMP between_320_and_448
less_than_512:
XOR_UPPER_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
EXTRACT_LOWER(BX, Y12, Y13, Y14, Y15, Y4)
ADDQ $448, SI
ADDQ $448, DI
SUBQ $448, CX
JMP finalize
between_320_and_448:
VMOVDQA Y0, Y4
VMOVDQA Y1, Y5
VMOVDQA Y2, Y6
VPADDQ TWO, Y3, Y7
VMOVDQA Y0, Y8
VMOVDQA Y1, Y9
VMOVDQA Y2, Y10
VPADDQ TWO, Y7, Y11
MOVQ DX, R9
chacha_loop_384:
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y1, Y2, Y3)
CHACHA_SHUFFLE(Y5, Y6, Y7)
CHACHA_SHUFFLE(Y9, Y10, Y11)
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y3, Y2, Y1)
CHACHA_SHUFFLE(Y7, Y6, Y5)
CHACHA_SHUFFLE(Y11, Y10, Y9)
SUBQ $2, R9
JA chacha_loop_384
VPADDD STATE_0, Y0, Y0
VPADDD STATE_1, Y1, Y1
VPADDD STATE_2, Y2, Y2
VPADDD STATE_3, Y3, Y3
XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
VMOVDQA STATE_0, Y0
VMOVDQA STATE_1, Y1
VMOVDQA STATE_2, Y2
VMOVDQA STATE_3, Y3
VPADDQ TWO, Y3, Y3
VPADDD Y0, Y4, Y4
VPADDD Y1, Y5, Y5
VPADDD Y2, Y6, Y6
VPADDD Y3, Y7, Y7
XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
VPADDQ TWO, Y3, Y3
VPADDD Y0, Y8, Y8
VPADDD Y1, Y9, Y9
VPADDD Y2, Y10, Y10
VPADDD Y3, Y11, Y11
VPADDQ TWO, Y3, Y3
CMPQ CX, $384
JB less_than_384
XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
SUBQ $384, CX
TESTQ CX, CX
JE done
ADDQ $384, SI
ADDQ $384, DI
JMP between_0_and_64
less_than_384:
XOR_UPPER_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
ADDQ $320, SI
ADDQ $320, DI
SUBQ $320, CX
JMP finalize
between_192_and_320:
VMOVDQA Y0, Y4
VMOVDQA Y1, Y5
VMOVDQA Y2, Y6
VMOVDQA Y3, Y7
VMOVDQA Y0, Y8
VMOVDQA Y1, Y9
VMOVDQA Y2, Y10
VPADDQ TWO, Y3, Y11
MOVQ DX, R9
chacha_loop_256:
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y5, Y6, Y7)
CHACHA_SHUFFLE(Y9, Y10, Y11)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y7, Y6, Y5)
CHACHA_SHUFFLE(Y11, Y10, Y9)
SUBQ $2, R9
JA chacha_loop_256
VPADDD Y0, Y4, Y4
VPADDD Y1, Y5, Y5
VPADDD Y2, Y6, Y6
VPADDD Y3, Y7, Y7
VPADDQ TWO, Y3, Y3
XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
VPADDD Y0, Y8, Y8
VPADDD Y1, Y9, Y9
VPADDD Y2, Y10, Y10
VPADDD Y3, Y11, Y11
VPADDQ TWO, Y3, Y3
CMPQ CX, $256
JB less_than_256
XOR_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
SUBQ $256, CX
TESTQ CX, CX
JE done
ADDQ $256, SI
ADDQ $256, DI
JMP between_0_and_64
less_than_256:
XOR_UPPER_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
ADDQ $192, SI
ADDQ $192, DI
SUBQ $192, CX
JMP finalize
between_64_and_192:
VMOVDQA Y0, Y4
VMOVDQA Y1, Y5
VMOVDQA Y2, Y6
VMOVDQA Y3, Y7
MOVQ DX, R9
chacha_loop_128:
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y5, Y6, Y7)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y7, Y6, Y5)
SUBQ $2, R9
JA chacha_loop_128
VPADDD Y0, Y4, Y4
VPADDD Y1, Y5, Y5
VPADDD Y2, Y6, Y6
VPADDD Y3, Y7, Y7
VPADDQ TWO, Y3, Y3
CMPQ CX, $128
JB less_than_128
XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
SUBQ $128, CX
TESTQ CX, CX
JE done
ADDQ $128, SI
ADDQ $128, DI
JMP between_0_and_64
less_than_128:
XOR_UPPER_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
EXTRACT_LOWER(BX, Y4, Y5, Y6, Y7, Y13)
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, CX
JMP finalize
between_0_and_64:
VMOVDQA X0, X4
VMOVDQA X1, X5
VMOVDQA X2, X6
VMOVDQA X3, X7
MOVQ DX, R9
chacha_loop_64:
CHACHA_QROUND(X4, X5, X6, X7, X13, X14, X15)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_QROUND(X4, X5, X6, X7, X13, X14, X15)
CHACHA_SHUFFLE(X7, X6, X5)
SUBQ $2, R9
JA chacha_loop_64
VPADDD X0, X4, X4
VPADDD X1, X5, X5
VPADDD X2, X6, X6
VPADDD X3, X7, X7
VMOVDQU ·one_AVX<>(SB), X0
VPADDQ X0, X3, X3
CMPQ CX, $64
JB less_than_64
XOR_AVX(DI, SI, 0, X4, X5, X6, X7, X13)
SUBQ $64, CX
JMP done
less_than_64:
VMOVDQU X4, 0(BX)
VMOVDQU X5, 16(BX)
VMOVDQU X6, 32(BX)
VMOVDQU X7, 48(BX)
finalize:
XORQ R11, R11
XORQ R12, R12
MOVQ CX, BP
xor_loop:
MOVB 0(SI), R11
MOVB 0(BX), R12
XORQ R11, R12
MOVB R12, 0(DI)
INCQ SI
INCQ BX
INCQ DI
DECQ BP
JA xor_loop
done:
VMOVDQU X3, 48(AX)
VZEROUPPER
MOVQ R8, SP
MOVQ CX, ret+72(FP)
RET
// func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20AVX(SB), 4, $0-24
MOVQ out+0(FP), DI
MOVQ nonce+8(FP), AX
MOVQ key+16(FP), BX
VMOVDQU ·sigma_AVX<>(SB), X0
VMOVDQU 0(BX), X1
VMOVDQU 16(BX), X2
VMOVDQU 0(AX), X3
VMOVDQU ·rol16_AVX2<>(SB), X5
VMOVDQU ·rol8_AVX2<>(SB), X6
MOVQ $20, CX
chacha_loop:
CHACHA_QROUND(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_QROUND(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X3, X2, X1)
SUBQ $2, CX
JNZ chacha_loop
VMOVDQU X0, 0(DI)
VMOVDQU X3, 16(DI)
VZEROUPPER
RET
// func supportsAVX2() bool
TEXT ·supportsAVX2(SB), 4, $0-1
MOVQ runtime·support_avx(SB), AX
MOVQ runtime·support_avx2(SB), BX
ANDQ AX, BX
MOVB BX, ret+0(FP)
RET

67
vendor/github.com/aead/chacha20/chacha/chacha_386.go generated vendored Normal file
View file

@ -0,0 +1,67 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build 386,!gccgo,!appengine,!nacl
package chacha
import "encoding/binary"
func init() {
useSSE2 = supportsSSE2()
useSSSE3 = supportsSSSE3()
useAVX2 = false
}
func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
binary.LittleEndian.PutUint32(state[0:], sigma[0])
binary.LittleEndian.PutUint32(state[4:], sigma[1])
binary.LittleEndian.PutUint32(state[8:], sigma[2])
binary.LittleEndian.PutUint32(state[12:], sigma[3])
copy(state[16:], key[:])
copy(state[48:], nonce[:])
}
// This function is implemented in chacha_386.s
//go:noescape
func supportsSSE2() bool
// This function is implemented in chacha_386.s
//go:noescape
func supportsSSSE3() bool
// This function is implemented in chacha_386.s
//go:noescape
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_386.s
//go:noescape
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_386.s
//go:noescape
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chacha_386.s
//go:noescape
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
if useSSSE3 {
hChaCha20SSSE3(out, nonce, key)
} else if useSSE2 {
hChaCha20SSE2(out, nonce, key)
} else {
hChaCha20Generic(out, nonce, key)
}
}
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
if useSSSE3 {
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
} else if useSSE2 {
return xorKeyStreamSSE2(dst, src, block, state, rounds)
}
return xorKeyStreamGeneric(dst, src, block, state, rounds)
}

311
vendor/github.com/aead/chacha20/chacha/chacha_386.s generated vendored Normal file
View file

@ -0,0 +1,311 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build 386,!gccgo,!appengine,!nacl
#include "textflag.h"
DATA ·sigma<>+0x00(SB)/4, $0x61707865
DATA ·sigma<>+0x04(SB)/4, $0x3320646e
DATA ·sigma<>+0x08(SB)/4, $0x79622d32
DATA ·sigma<>+0x0C(SB)/4, $0x6b206574
GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16
DATA ·one<>+0x00(SB)/8, $1
DATA ·one<>+0x08(SB)/8, $0
GLOBL ·one<>(SB), (NOPTR+RODATA), $16
DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302
DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16
DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003
DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16
#define ROTL_SSE2(n, t, v) \
MOVO v, t; \
PSLLL $n, t; \
PSRLL $(32-n), v; \
PXOR t, v
#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t0) \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE2(16, t0, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(12, t0, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE2(8, t0, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(7, t0, v1)
#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t0, r16, r8) \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r16, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(12, t0, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r8, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(7, t0, v1)
#define CHACHA_SHUFFLE(v1, v2, v3) \
PSHUFL $0x39, v1, v1; \
PSHUFL $0x4E, v2, v2; \
PSHUFL $0x93, v3, v3
#define XOR(dst, src, off, v0, v1, v2, v3, t0) \
MOVOU 0+off(src), t0; \
PXOR v0, t0; \
MOVOU t0, 0+off(dst); \
MOVOU 16+off(src), t0; \
PXOR v1, t0; \
MOVOU t0, 16+off(dst); \
MOVOU 32+off(src), t0; \
PXOR v2, t0; \
MOVOU t0, 32+off(dst); \
MOVOU 48+off(src), t0; \
PXOR v3, t0; \
MOVOU t0, 48+off(dst)
#define FINALIZE(dst, src, block, len, t0, t1) \
XORL t0, t0; \
XORL t1, t1; \
finalize: \
MOVB 0(src), t0; \
MOVB 0(block), t1; \
XORL t0, t1; \
MOVB t1, 0(dst); \
INCL src; \
INCL block; \
INCL dst; \
DECL len; \
JA finalize \
// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamSSE2(SB), 4, $0-40
MOVL dst_base+0(FP), DI
MOVL src_base+12(FP), SI
MOVL src_len+16(FP), CX
MOVL state+28(FP), AX
MOVL rounds+32(FP), DX
MOVOU 0(AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
TESTL CX, CX
JZ done
at_least_64:
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
MOVL DX, BX
chacha_loop:
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
CHACHA_SHUFFLE(X7, X6, X5)
SUBL $2, BX
JA chacha_loop
MOVOU 0(AX), X0
PADDL X0, X4
PADDL X1, X5
PADDL X2, X6
PADDL X3, X7
MOVOU ·one<>(SB), X0
PADDQ X0, X3
CMPL CX, $64
JB less_than_64
XOR(DI, SI, 0, X4, X5, X6, X7, X0)
MOVOU 0(AX), X0
ADDL $64, SI
ADDL $64, DI
SUBL $64, CX
JNZ at_least_64
less_than_64:
MOVL CX, BP
TESTL BP, BP
JZ done
MOVL block+24(FP), BX
MOVOU X4, 0(BX)
MOVOU X5, 16(BX)
MOVOU X6, 32(BX)
MOVOU X7, 48(BX)
FINALIZE(DI, SI, BX, BP, AX, DX)
done:
MOVL state+28(FP), AX
MOVOU X3, 48(AX)
MOVL CX, ret+36(FP)
RET
// func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamSSSE3(SB), 4, $64-40
MOVL dst_base+0(FP), DI
MOVL src_base+12(FP), SI
MOVL src_len+16(FP), CX
MOVL state+28(FP), AX
MOVL rounds+32(FP), DX
MOVOU 48(AX), X3
TESTL CX, CX
JZ done
MOVL SP, BP
ADDL $16, SP
ANDL $-16, SP
MOVOU ·one<>(SB), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVO X0, 0(SP)
MOVO X1, 16(SP)
MOVO X2, 32(SP)
MOVOU 0(AX), X0
MOVOU ·rol16<>(SB), X1
MOVOU ·rol8<>(SB), X2
at_least_64:
MOVO X0, X4
MOVO 16(SP), X5
MOVO 32(SP), X6
MOVO X3, X7
MOVL DX, BX
chacha_loop:
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
CHACHA_SHUFFLE(X7, X6, X5)
SUBL $2, BX
JA chacha_loop
MOVOU 0(AX), X0
PADDL X0, X4
PADDL 16(SP), X5
PADDL 32(SP), X6
PADDL X3, X7
PADDQ 0(SP), X3
CMPL CX, $64
JB less_than_64
XOR(DI, SI, 0, X4, X5, X6, X7, X0)
MOVOU 0(AX), X0
ADDL $64, SI
ADDL $64, DI
SUBL $64, CX
JNZ at_least_64
less_than_64:
MOVL BP, SP
MOVL CX, BP
TESTL BP, BP
JE done
MOVL block+24(FP), BX
MOVOU X4, 0(BX)
MOVOU X5, 16(BX)
MOVOU X6, 32(BX)
MOVOU X7, 48(BX)
FINALIZE(DI, SI, BX, BP, AX, DX)
done:
MOVL state+28(FP), AX
MOVOU X3, 48(AX)
MOVL CX, ret+36(FP)
RET
// func supportsSSE2() bool
TEXT ·supportsSSE2(SB), NOSPLIT, $0-1
XORL AX, AX
INCL AX
CPUID
SHRL $26, DX
ANDL $1, DX
MOVB DX, ret+0(FP)
RET
// func supportsSSSE3() bool
TEXT ·supportsSSSE3(SB), NOSPLIT, $0-1
XORL AX, AX
INCL AX
CPUID
SHRL $9, CX
ANDL $1, CX
MOVB CX, ret+0(FP)
RET
// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20SSE2(SB), 4, $0-12
MOVL out+0(FP), DI
MOVL nonce+4(FP), AX
MOVL key+8(FP), BX
MOVOU ·sigma<>(SB), X0
MOVOU 0(BX), X1
MOVOU 16(BX), X2
MOVOU 0(AX), X3
MOVL $20, CX
chacha_loop:
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
CHACHA_SHUFFLE(X3, X2, X1)
SUBL $2, CX
JNZ chacha_loop
MOVOU X0, 0(DI)
MOVOU X3, 16(DI)
RET
// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20SSSE3(SB), 4, $0-12
MOVL out+0(FP), DI
MOVL nonce+4(FP), AX
MOVL key+8(FP), BX
MOVOU ·sigma<>(SB), X0
MOVOU 0(BX), X1
MOVOU 16(BX), X2
MOVOU 0(AX), X3
MOVOU ·rol16<>(SB), X5
MOVOU ·rol8<>(SB), X6
MOVL $20, CX
chacha_loop:
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X3, X2, X1)
SUBL $2, CX
JNZ chacha_loop
MOVOU X0, 0(DI)
MOVOU X3, 16(DI)
RET

788
vendor/github.com/aead/chacha20/chacha/chacha_amd64.s generated vendored Normal file
View file

@ -0,0 +1,788 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build amd64,!gccgo,!appengine,!nacl
#include "textflag.h"
DATA ·sigma<>+0x00(SB)/4, $0x61707865
DATA ·sigma<>+0x04(SB)/4, $0x3320646e
DATA ·sigma<>+0x08(SB)/4, $0x79622d32
DATA ·sigma<>+0x0C(SB)/4, $0x6b206574
GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16
DATA ·one<>+0x00(SB)/8, $1
DATA ·one<>+0x08(SB)/8, $0
GLOBL ·one<>(SB), (NOPTR+RODATA), $16
DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302
DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16
DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003
DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16
#define ROTL_SSE2(n, t, v) \
MOVO v, t; \
PSLLL $n, t; \
PSRLL $(32-n), v; \
PXOR t, v
#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t0) \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE2(16, t0, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(12, t0, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE2(8, t0, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(7, t0, v1)
#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t0, r16, r8) \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r16, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(12, t0, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r8, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(7, t0, v1)
#define CHACHA_SHUFFLE(v1, v2, v3) \
PSHUFL $0x39, v1, v1; \
PSHUFL $0x4E, v2, v2; \
PSHUFL $0x93, v3, v3
#define XOR(dst, src, off, v0, v1, v2, v3, t0) \
MOVOU 0+off(src), t0; \
PXOR v0, t0; \
MOVOU t0, 0+off(dst); \
MOVOU 16+off(src), t0; \
PXOR v1, t0; \
MOVOU t0, 16+off(dst); \
MOVOU 32+off(src), t0; \
PXOR v2, t0; \
MOVOU t0, 32+off(dst); \
MOVOU 48+off(src), t0; \
PXOR v3, t0; \
MOVOU t0, 48+off(dst)
// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamSSE2(SB), 4, $112-80
MOVQ dst_base+0(FP), DI
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), CX
MOVQ block+48(FP), BX
MOVQ state+56(FP), AX
MOVQ rounds+64(FP), DX
MOVQ SP, R9
ADDQ $16, SP
ANDQ $-16, SP
MOVOU 0(AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU ·one<>(SB), X15
TESTQ CX, CX
JZ done
CMPQ CX, $64
JBE between_0_and_64
CMPQ CX, $128
JBE between_64_and_128
MOVO X0, 0(SP)
MOVO X1, 16(SP)
MOVO X2, 32(SP)
MOVO X3, 48(SP)
MOVO X15, 64(SP)
CMPQ CX, $192
JBE between_128_and_192
MOVQ $192, R14
at_least_256:
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
PADDQ 64(SP), X7
MOVO X0, X12
MOVO X1, X13
MOVO X2, X14
MOVO X7, X15
PADDQ 64(SP), X15
MOVO X0, X8
MOVO X1, X9
MOVO X2, X10
MOVO X15, X11
PADDQ 64(SP), X11
MOVQ DX, R8
chacha_loop_256:
MOVO X8, 80(SP)
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X8)
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X8)
MOVO 80(SP), X8
MOVO X0, 80(SP)
CHACHA_QROUND_SSE2(X12, X13, X14, X15, X0)
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0)
MOVO 80(SP), X0
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_SHUFFLE(X13, X14, X15)
CHACHA_SHUFFLE(X9, X10, X11)
MOVO X8, 80(SP)
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X8)
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X8)
MOVO 80(SP), X8
MOVO X0, 80(SP)
CHACHA_QROUND_SSE2(X12, X13, X14, X15, X0)
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0)
MOVO 80(SP), X0
CHACHA_SHUFFLE(X3, X2, X1)
CHACHA_SHUFFLE(X7, X6, X5)
CHACHA_SHUFFLE(X15, X14, X13)
CHACHA_SHUFFLE(X11, X10, X9)
SUBQ $2, R8
JA chacha_loop_256
MOVO X8, 80(SP)
PADDL 0(SP), X0
PADDL 16(SP), X1
PADDL 32(SP), X2
PADDL 48(SP), X3
XOR(DI, SI, 0, X0, X1, X2, X3, X8)
MOVO 0(SP), X0
MOVO 16(SP), X1
MOVO 32(SP), X2
MOVO 48(SP), X3
PADDQ 64(SP), X3
PADDL X0, X4
PADDL X1, X5
PADDL X2, X6
PADDL X3, X7
PADDQ 64(SP), X3
XOR(DI, SI, 64, X4, X5, X6, X7, X8)
MOVO 64(SP), X5
MOVO 80(SP), X8
PADDL X0, X12
PADDL X1, X13
PADDL X2, X14
PADDL X3, X15
PADDQ X5, X3
XOR(DI, SI, 128, X12, X13, X14, X15, X4)
PADDL X0, X8
PADDL X1, X9
PADDL X2, X10
PADDL X3, X11
PADDQ X5, X3
CMPQ CX, $256
JB less_than_64
XOR(DI, SI, 192, X8, X9, X10, X11, X4)
MOVO X3, 48(SP)
ADDQ $256, SI
ADDQ $256, DI
SUBQ $256, CX
CMPQ CX, $192
JA at_least_256
TESTQ CX, CX
JZ done
MOVO 64(SP), X15
CMPQ CX, $64
JBE between_0_and_64
CMPQ CX, $128
JBE between_64_and_128
between_128_and_192:
MOVQ $128, R14
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
PADDQ X15, X7
MOVO X0, X8
MOVO X1, X9
MOVO X2, X10
MOVO X7, X11
PADDQ X15, X11
MOVQ DX, R8
chacha_loop_192:
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X12)
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_SHUFFLE(X9, X10, X11)
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X12)
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
CHACHA_SHUFFLE(X3, X2, X1)
CHACHA_SHUFFLE(X7, X6, X5)
CHACHA_SHUFFLE(X11, X10, X9)
SUBQ $2, R8
JA chacha_loop_192
PADDL 0(SP), X0
PADDL 16(SP), X1
PADDL 32(SP), X2
PADDL 48(SP), X3
XOR(DI, SI, 0, X0, X1, X2, X3, X12)
MOVO 0(SP), X0
MOVO 16(SP), X1
MOVO 32(SP), X2
MOVO 48(SP), X3
PADDQ X15, X3
PADDL X0, X4
PADDL X1, X5
PADDL X2, X6
PADDL X3, X7
PADDQ X15, X3
XOR(DI, SI, 64, X4, X5, X6, X7, X12)
PADDL X0, X8
PADDL X1, X9
PADDL X2, X10
PADDL X3, X11
PADDQ X15, X3
CMPQ CX, $192
JB less_than_64
XOR(DI, SI, 128, X8, X9, X10, X11, X12)
SUBQ $192, CX
JMP done
between_64_and_128:
MOVQ $64, R14
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
MOVO X0, X8
MOVO X1, X9
MOVO X2, X10
MOVO X3, X11
PADDQ X15, X11
MOVQ DX, R8
chacha_loop_128:
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_SHUFFLE(X9, X10, X11)
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
CHACHA_SHUFFLE(X7, X6, X5)
CHACHA_SHUFFLE(X11, X10, X9)
SUBQ $2, R8
JA chacha_loop_128
PADDL X0, X4
PADDL X1, X5
PADDL X2, X6
PADDL X3, X7
PADDQ X15, X3
PADDL X0, X8
PADDL X1, X9
PADDL X2, X10
PADDL X3, X11
PADDQ X15, X3
XOR(DI, SI, 0, X4, X5, X6, X7, X12)
CMPQ CX, $128
JB less_than_64
XOR(DI, SI, 64, X8, X9, X10, X11, X12)
SUBQ $128, CX
JMP done
between_0_and_64:
MOVQ $0, R14
MOVO X0, X8
MOVO X1, X9
MOVO X2, X10
MOVO X3, X11
MOVQ DX, R8
chacha_loop_64:
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
CHACHA_SHUFFLE(X9, X10, X11)
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
CHACHA_SHUFFLE(X11, X10, X9)
SUBQ $2, R8
JA chacha_loop_64
PADDL X0, X8
PADDL X1, X9
PADDL X2, X10
PADDL X3, X11
PADDQ X15, X3
CMPQ CX, $64
JB less_than_64
XOR(DI, SI, 0, X8, X9, X10, X11, X12)
SUBQ $64, CX
JMP done
less_than_64:
// R14 contains the num of bytes already xor'd
ADDQ R14, SI
ADDQ R14, DI
SUBQ R14, CX
MOVOU X8, 0(BX)
MOVOU X9, 16(BX)
MOVOU X10, 32(BX)
MOVOU X11, 48(BX)
XORQ R11, R11
XORQ R12, R12
MOVQ CX, BP
xor_loop:
MOVB 0(SI), R11
MOVB 0(BX), R12
XORQ R11, R12
MOVB R12, 0(DI)
INCQ SI
INCQ BX
INCQ DI
DECQ BP
JA xor_loop
done:
MOVOU X3, 48(AX)
MOVQ R9, SP
MOVQ CX, ret+72(FP)
RET
// func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamSSSE3(SB), 4, $144-80
MOVQ dst_base+0(FP), DI
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), CX
MOVQ block+48(FP), BX
MOVQ state+56(FP), AX
MOVQ rounds+64(FP), DX
MOVQ SP, R9
ADDQ $16, SP
ANDQ $-16, SP
MOVOU 0(AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU ·rol16<>(SB), X13
MOVOU ·rol8<>(SB), X14
MOVOU ·one<>(SB), X15
TESTQ CX, CX
JZ done
CMPQ CX, $64
JBE between_0_and_64
CMPQ CX, $128
JBE between_64_and_128
MOVO X0, 0(SP)
MOVO X1, 16(SP)
MOVO X2, 32(SP)
MOVO X3, 48(SP)
MOVO X15, 64(SP)
CMPQ CX, $192
JBE between_128_and_192
MOVO X13, 96(SP)
MOVO X14, 112(SP)
MOVQ $192, R14
at_least_256:
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
PADDQ 64(SP), X7
MOVO X0, X12
MOVO X1, X13
MOVO X2, X14
MOVO X7, X15
PADDQ 64(SP), X15
MOVO X0, X8
MOVO X1, X9
MOVO X2, X10
MOVO X15, X11
PADDQ 64(SP), X11
MOVQ DX, R8
chacha_loop_256:
MOVO X8, 80(SP)
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X8, 96(SP), 112(SP))
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X8, 96(SP), 112(SP))
MOVO 80(SP), X8
MOVO X0, 80(SP)
CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X0, 96(SP), 112(SP))
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, 96(SP), 112(SP))
MOVO 80(SP), X0
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_SHUFFLE(X13, X14, X15)
CHACHA_SHUFFLE(X9, X10, X11)
MOVO X8, 80(SP)
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X8, 96(SP), 112(SP))
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X8, 96(SP), 112(SP))
MOVO 80(SP), X8
MOVO X0, 80(SP)
CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X0, 96(SP), 112(SP))
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, 96(SP), 112(SP))
MOVO 80(SP), X0
CHACHA_SHUFFLE(X3, X2, X1)
CHACHA_SHUFFLE(X7, X6, X5)
CHACHA_SHUFFLE(X15, X14, X13)
CHACHA_SHUFFLE(X11, X10, X9)
SUBQ $2, R8
JA chacha_loop_256
MOVO X8, 80(SP)
PADDL 0(SP), X0
PADDL 16(SP), X1
PADDL 32(SP), X2
PADDL 48(SP), X3
XOR(DI, SI, 0, X0, X1, X2, X3, X8)
MOVO 0(SP), X0
MOVO 16(SP), X1
MOVO 32(SP), X2
MOVO 48(SP), X3
PADDQ 64(SP), X3
PADDL X0, X4
PADDL X1, X5
PADDL X2, X6
PADDL X3, X7
PADDQ 64(SP), X3
XOR(DI, SI, 64, X4, X5, X6, X7, X8)
MOVO 64(SP), X5
MOVO 80(SP), X8
PADDL X0, X12
PADDL X1, X13
PADDL X2, X14
PADDL X3, X15
PADDQ X5, X3
XOR(DI, SI, 128, X12, X13, X14, X15, X4)
PADDL X0, X8
PADDL X1, X9
PADDL X2, X10
PADDL X3, X11
PADDQ X5, X3
CMPQ CX, $256
JB less_than_64
XOR(DI, SI, 192, X8, X9, X10, X11, X4)
MOVO X3, 48(SP)
ADDQ $256, SI
ADDQ $256, DI
SUBQ $256, CX
CMPQ CX, $192
JA at_least_256
TESTQ CX, CX
JZ done
MOVOU ·rol16<>(SB), X13
MOVOU ·rol8<>(SB), X14
MOVO 64(SP), X15
CMPQ CX, $64
JBE between_0_and_64
CMPQ CX, $128
JBE between_64_and_128
between_128_and_192:
MOVQ $128, R14
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
PADDQ X15, X7
MOVO X0, X8
MOVO X1, X9
MOVO X2, X10
MOVO X7, X11
PADDQ X15, X11
MOVQ DX, R8
chacha_loop_192:
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X12, X13, X14)
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_SHUFFLE(X9, X10, X11)
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X12, X13, X14)
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
CHACHA_SHUFFLE(X3, X2, X1)
CHACHA_SHUFFLE(X7, X6, X5)
CHACHA_SHUFFLE(X11, X10, X9)
SUBQ $2, R8
JA chacha_loop_192
PADDL 0(SP), X0
PADDL 16(SP), X1
PADDL 32(SP), X2
PADDL 48(SP), X3
XOR(DI, SI, 0, X0, X1, X2, X3, X12)
MOVO 0(SP), X0
MOVO 16(SP), X1
MOVO 32(SP), X2
MOVO 48(SP), X3
PADDQ X15, X3
PADDL X0, X4
PADDL X1, X5
PADDL X2, X6
PADDL X3, X7
PADDQ X15, X3
XOR(DI, SI, 64, X4, X5, X6, X7, X12)
PADDL X0, X8
PADDL X1, X9
PADDL X2, X10
PADDL X3, X11
PADDQ X15, X3
CMPQ CX, $192
JB less_than_64
XOR(DI, SI, 128, X8, X9, X10, X11, X12)
SUBQ $192, CX
JMP done
between_64_and_128:
MOVQ $64, R14
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
MOVO X0, X8
MOVO X1, X9
MOVO X2, X10
MOVO X3, X11
PADDQ X15, X11
MOVQ DX, R8
chacha_loop_128:
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_SHUFFLE(X9, X10, X11)
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
CHACHA_SHUFFLE(X7, X6, X5)
CHACHA_SHUFFLE(X11, X10, X9)
SUBQ $2, R8
JA chacha_loop_128
PADDL X0, X4
PADDL X1, X5
PADDL X2, X6
PADDL X3, X7
PADDQ X15, X3
PADDL X0, X8
PADDL X1, X9
PADDL X2, X10
PADDL X3, X11
PADDQ X15, X3
XOR(DI, SI, 0, X4, X5, X6, X7, X12)
CMPQ CX, $128
JB less_than_64
XOR(DI, SI, 64, X8, X9, X10, X11, X12)
SUBQ $128, CX
JMP done
between_0_and_64:
MOVQ $0, R14
MOVO X0, X8
MOVO X1, X9
MOVO X2, X10
MOVO X3, X11
MOVQ DX, R8
chacha_loop_64:
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
CHACHA_SHUFFLE(X9, X10, X11)
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
CHACHA_SHUFFLE(X11, X10, X9)
SUBQ $2, R8
JA chacha_loop_64
PADDL X0, X8
PADDL X1, X9
PADDL X2, X10
PADDL X3, X11
PADDQ X15, X3
CMPQ CX, $64
JB less_than_64
XOR(DI, SI, 0, X8, X9, X10, X11, X12)
SUBQ $64, CX
JMP done
less_than_64:
// R14 contains the num of bytes already xor'd
ADDQ R14, SI
ADDQ R14, DI
SUBQ R14, CX
MOVOU X8, 0(BX)
MOVOU X9, 16(BX)
MOVOU X10, 32(BX)
MOVOU X11, 48(BX)
XORQ R11, R11
XORQ R12, R12
MOVQ CX, BP
xor_loop:
MOVB 0(SI), R11
MOVB 0(BX), R12
XORQ R11, R12
MOVB R12, 0(DI)
INCQ SI
INCQ BX
INCQ DI
DECQ BP
JA xor_loop
done:
MOVQ R9, SP
MOVOU X3, 48(AX)
MOVQ CX, ret+72(FP)
RET
// func supportsSSSE3() bool
TEXT ·supportsSSSE3(SB), NOSPLIT, $0-1
XORQ AX, AX
INCQ AX
CPUID
SHRQ $9, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET
// func initialize(state *[64]byte, key []byte, nonce *[16]byte)
TEXT ·initialize(SB), 4, $0-40
MOVQ state+0(FP), DI
MOVQ key+8(FP), AX
MOVQ nonce+32(FP), BX
MOVOU ·sigma<>(SB), X0
MOVOU 0(AX), X1
MOVOU 16(AX), X2
MOVOU 0(BX), X3
MOVOU X0, 0(DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
MOVOU X3, 48(DI)
RET
// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20SSE2(SB), 4, $0-24
MOVQ out+0(FP), DI
MOVQ nonce+8(FP), AX
MOVQ key+16(FP), BX
MOVOU ·sigma<>(SB), X0
MOVOU 0(BX), X1
MOVOU 16(BX), X2
MOVOU 0(AX), X3
MOVQ $20, CX
chacha_loop:
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
CHACHA_SHUFFLE(X3, X2, X1)
SUBQ $2, CX
JNZ chacha_loop
MOVOU X0, 0(DI)
MOVOU X3, 16(DI)
RET
// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20SSSE3(SB), 4, $0-24
MOVQ out+0(FP), DI
MOVQ nonce+8(FP), AX
MOVQ key+16(FP), BX
MOVOU ·sigma<>(SB), X0
MOVOU 0(BX), X1
MOVOU 16(BX), X2
MOVOU 0(AX), X3
MOVOU ·rol16<>(SB), X5
MOVOU ·rol8<>(SB), X6
MOVQ $20, CX
chacha_loop:
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X3, X2, X1)
SUBQ $2, CX
JNZ chacha_loop
MOVOU X0, 0(DI)
MOVOU X3, 16(DI)
RET

View file

@ -0,0 +1,319 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
package chacha
import "encoding/binary"
var sigma = [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}
func xorKeyStreamGeneric(dst, src []byte, block, state *[64]byte, rounds int) int {
for len(src) >= 64 {
chachaGeneric(block, state, rounds)
for i, v := range block {
dst[i] = src[i] ^ v
}
src = src[64:]
dst = dst[64:]
}
n := len(src)
if n > 0 {
chachaGeneric(block, state, rounds)
for i, v := range src {
dst[i] = v ^ block[i]
}
}
return n
}
func chachaGeneric(dst *[64]byte, state *[64]byte, rounds int) {
v00 := binary.LittleEndian.Uint32(state[0:])
v01 := binary.LittleEndian.Uint32(state[4:])
v02 := binary.LittleEndian.Uint32(state[8:])
v03 := binary.LittleEndian.Uint32(state[12:])
v04 := binary.LittleEndian.Uint32(state[16:])
v05 := binary.LittleEndian.Uint32(state[20:])
v06 := binary.LittleEndian.Uint32(state[24:])
v07 := binary.LittleEndian.Uint32(state[28:])
v08 := binary.LittleEndian.Uint32(state[32:])
v09 := binary.LittleEndian.Uint32(state[36:])
v10 := binary.LittleEndian.Uint32(state[40:])
v11 := binary.LittleEndian.Uint32(state[44:])
v12 := binary.LittleEndian.Uint32(state[48:])
v13 := binary.LittleEndian.Uint32(state[52:])
v14 := binary.LittleEndian.Uint32(state[56:])
v15 := binary.LittleEndian.Uint32(state[60:])
s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07
s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15
for i := 0; i < rounds; i += 2 {
v00 += v04
v12 ^= v00
v12 = (v12 << 16) | (v12 >> 16)
v08 += v12
v04 ^= v08
v04 = (v04 << 12) | (v04 >> 20)
v00 += v04
v12 ^= v00
v12 = (v12 << 8) | (v12 >> 24)
v08 += v12
v04 ^= v08
v04 = (v04 << 7) | (v04 >> 25)
v01 += v05
v13 ^= v01
v13 = (v13 << 16) | (v13 >> 16)
v09 += v13
v05 ^= v09
v05 = (v05 << 12) | (v05 >> 20)
v01 += v05
v13 ^= v01
v13 = (v13 << 8) | (v13 >> 24)
v09 += v13
v05 ^= v09
v05 = (v05 << 7) | (v05 >> 25)
v02 += v06
v14 ^= v02
v14 = (v14 << 16) | (v14 >> 16)
v10 += v14
v06 ^= v10
v06 = (v06 << 12) | (v06 >> 20)
v02 += v06
v14 ^= v02
v14 = (v14 << 8) | (v14 >> 24)
v10 += v14
v06 ^= v10
v06 = (v06 << 7) | (v06 >> 25)
v03 += v07
v15 ^= v03
v15 = (v15 << 16) | (v15 >> 16)
v11 += v15
v07 ^= v11
v07 = (v07 << 12) | (v07 >> 20)
v03 += v07
v15 ^= v03
v15 = (v15 << 8) | (v15 >> 24)
v11 += v15
v07 ^= v11
v07 = (v07 << 7) | (v07 >> 25)
v00 += v05
v15 ^= v00
v15 = (v15 << 16) | (v15 >> 16)
v10 += v15
v05 ^= v10
v05 = (v05 << 12) | (v05 >> 20)
v00 += v05
v15 ^= v00
v15 = (v15 << 8) | (v15 >> 24)
v10 += v15
v05 ^= v10
v05 = (v05 << 7) | (v05 >> 25)
v01 += v06
v12 ^= v01
v12 = (v12 << 16) | (v12 >> 16)
v11 += v12
v06 ^= v11
v06 = (v06 << 12) | (v06 >> 20)
v01 += v06
v12 ^= v01
v12 = (v12 << 8) | (v12 >> 24)
v11 += v12
v06 ^= v11
v06 = (v06 << 7) | (v06 >> 25)
v02 += v07
v13 ^= v02
v13 = (v13 << 16) | (v13 >> 16)
v08 += v13
v07 ^= v08
v07 = (v07 << 12) | (v07 >> 20)
v02 += v07
v13 ^= v02
v13 = (v13 << 8) | (v13 >> 24)
v08 += v13
v07 ^= v08
v07 = (v07 << 7) | (v07 >> 25)
v03 += v04
v14 ^= v03
v14 = (v14 << 16) | (v14 >> 16)
v09 += v14
v04 ^= v09
v04 = (v04 << 12) | (v04 >> 20)
v03 += v04
v14 ^= v03
v14 = (v14 << 8) | (v14 >> 24)
v09 += v14
v04 ^= v09
v04 = (v04 << 7) | (v04 >> 25)
}
v00 += s00
v01 += s01
v02 += s02
v03 += s03
v04 += s04
v05 += s05
v06 += s06
v07 += s07
v08 += s08
v09 += s09
v10 += s10
v11 += s11
v12 += s12
v13 += s13
v14 += s14
v15 += s15
s12++
binary.LittleEndian.PutUint32(state[48:], s12)
if s12 == 0 { // indicates overflow
s13++
binary.LittleEndian.PutUint32(state[52:], s13)
}
binary.LittleEndian.PutUint32(dst[0:], v00)
binary.LittleEndian.PutUint32(dst[4:], v01)
binary.LittleEndian.PutUint32(dst[8:], v02)
binary.LittleEndian.PutUint32(dst[12:], v03)
binary.LittleEndian.PutUint32(dst[16:], v04)
binary.LittleEndian.PutUint32(dst[20:], v05)
binary.LittleEndian.PutUint32(dst[24:], v06)
binary.LittleEndian.PutUint32(dst[28:], v07)
binary.LittleEndian.PutUint32(dst[32:], v08)
binary.LittleEndian.PutUint32(dst[36:], v09)
binary.LittleEndian.PutUint32(dst[40:], v10)
binary.LittleEndian.PutUint32(dst[44:], v11)
binary.LittleEndian.PutUint32(dst[48:], v12)
binary.LittleEndian.PutUint32(dst[52:], v13)
binary.LittleEndian.PutUint32(dst[56:], v14)
binary.LittleEndian.PutUint32(dst[60:], v15)
}
func hChaCha20Generic(out *[32]byte, nonce *[16]byte, key *[32]byte) {
v00 := sigma[0]
v01 := sigma[1]
v02 := sigma[2]
v03 := sigma[3]
v04 := binary.LittleEndian.Uint32(key[0:])
v05 := binary.LittleEndian.Uint32(key[4:])
v06 := binary.LittleEndian.Uint32(key[8:])
v07 := binary.LittleEndian.Uint32(key[12:])
v08 := binary.LittleEndian.Uint32(key[16:])
v09 := binary.LittleEndian.Uint32(key[20:])
v10 := binary.LittleEndian.Uint32(key[24:])
v11 := binary.LittleEndian.Uint32(key[28:])
v12 := binary.LittleEndian.Uint32(nonce[0:])
v13 := binary.LittleEndian.Uint32(nonce[4:])
v14 := binary.LittleEndian.Uint32(nonce[8:])
v15 := binary.LittleEndian.Uint32(nonce[12:])
for i := 0; i < 20; i += 2 {
v00 += v04
v12 ^= v00
v12 = (v12 << 16) | (v12 >> 16)
v08 += v12
v04 ^= v08
v04 = (v04 << 12) | (v04 >> 20)
v00 += v04
v12 ^= v00
v12 = (v12 << 8) | (v12 >> 24)
v08 += v12
v04 ^= v08
v04 = (v04 << 7) | (v04 >> 25)
v01 += v05
v13 ^= v01
v13 = (v13 << 16) | (v13 >> 16)
v09 += v13
v05 ^= v09
v05 = (v05 << 12) | (v05 >> 20)
v01 += v05
v13 ^= v01
v13 = (v13 << 8) | (v13 >> 24)
v09 += v13
v05 ^= v09
v05 = (v05 << 7) | (v05 >> 25)
v02 += v06
v14 ^= v02
v14 = (v14 << 16) | (v14 >> 16)
v10 += v14
v06 ^= v10
v06 = (v06 << 12) | (v06 >> 20)
v02 += v06
v14 ^= v02
v14 = (v14 << 8) | (v14 >> 24)
v10 += v14
v06 ^= v10
v06 = (v06 << 7) | (v06 >> 25)
v03 += v07
v15 ^= v03
v15 = (v15 << 16) | (v15 >> 16)
v11 += v15
v07 ^= v11
v07 = (v07 << 12) | (v07 >> 20)
v03 += v07
v15 ^= v03
v15 = (v15 << 8) | (v15 >> 24)
v11 += v15
v07 ^= v11
v07 = (v07 << 7) | (v07 >> 25)
v00 += v05
v15 ^= v00
v15 = (v15 << 16) | (v15 >> 16)
v10 += v15
v05 ^= v10
v05 = (v05 << 12) | (v05 >> 20)
v00 += v05
v15 ^= v00
v15 = (v15 << 8) | (v15 >> 24)
v10 += v15
v05 ^= v10
v05 = (v05 << 7) | (v05 >> 25)
v01 += v06
v12 ^= v01
v12 = (v12 << 16) | (v12 >> 16)
v11 += v12
v06 ^= v11
v06 = (v06 << 12) | (v06 >> 20)
v01 += v06
v12 ^= v01
v12 = (v12 << 8) | (v12 >> 24)
v11 += v12
v06 ^= v11
v06 = (v06 << 7) | (v06 >> 25)
v02 += v07
v13 ^= v02
v13 = (v13 << 16) | (v13 >> 16)
v08 += v13
v07 ^= v08
v07 = (v07 << 12) | (v07 >> 20)
v02 += v07
v13 ^= v02
v13 = (v13 << 8) | (v13 >> 24)
v08 += v13
v07 ^= v08
v07 = (v07 << 7) | (v07 >> 25)
v03 += v04
v14 ^= v03
v14 = (v14 << 16) | (v14 >> 16)
v09 += v14
v04 ^= v09
v04 = (v04 << 12) | (v04 >> 20)
v03 += v04
v14 ^= v03
v14 = (v14 << 8) | (v14 >> 24)
v09 += v14
v04 ^= v09
v04 = (v04 << 7) | (v04 >> 25)
}
binary.LittleEndian.PutUint32(out[0:], v00)
binary.LittleEndian.PutUint32(out[4:], v01)
binary.LittleEndian.PutUint32(out[8:], v02)
binary.LittleEndian.PutUint32(out[12:], v03)
binary.LittleEndian.PutUint32(out[16:], v12)
binary.LittleEndian.PutUint32(out[20:], v13)
binary.LittleEndian.PutUint32(out[24:], v14)
binary.LittleEndian.PutUint32(out[28:], v15)
}

View file

@ -0,0 +1,56 @@
// Copyright (c) 2017 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build amd64,!gccgo,!appengine,!nacl,!go1.7
package chacha
func init() {
useSSE2 = true
useSSSE3 = supportsSSSE3()
useAVX2 = false
}
// This function is implemented in chacha_amd64.s
//go:noescape
func initialize(state *[64]byte, key []byte, nonce *[16]byte)
// This function is implemented in chacha_amd64.s
//go:noescape
func supportsSSSE3() bool
// This function is implemented in chacha_amd64.s
//go:noescape
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_amd64.s
//go:noescape
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_amd64.s
//go:noescape
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chacha_amd64.s
//go:noescape
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
if useSSSE3 {
hChaCha20SSSE3(out, nonce, key)
} else if useSSE2 { // on amd64 this is always true - used to test generic on amd64
hChaCha20SSE2(out, nonce, key)
} else {
hChaCha20Generic(out, nonce, key)
}
}
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
if useSSSE3 {
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
} else if useSSE2 { // on amd64 this is always true - used to test generic on amd64
return xorKeyStreamSSE2(dst, src, block, state, rounds)
}
return xorKeyStreamGeneric(dst, src, block, state, rounds)
}

View file

@ -0,0 +1,72 @@
// Copyright (c) 2017 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build go1.7,amd64,!gccgo,!appengine,!nacl
package chacha
func init() {
useSSE2 = true
useSSSE3 = supportsSSSE3()
useAVX2 = supportsAVX2()
}
// This function is implemented in chacha_amd64.s
//go:noescape
func initialize(state *[64]byte, key []byte, nonce *[16]byte)
// This function is implemented in chacha_amd64.s
//go:noescape
func supportsSSSE3() bool
// This function is implemented in chachaAVX2_amd64.s
//go:noescape
func supportsAVX2() bool
// This function is implemented in chacha_amd64.s
//go:noescape
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_amd64.s
//go:noescape
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chachaAVX2_amd64.s
//go:noescape
func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_amd64.s
//go:noescape
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chacha_amd64.s
//go:noescape
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chachaAVX2_amd64.s
//go:noescape
func xorKeyStreamAVX2(dst, src []byte, block, state *[64]byte, rounds int) int
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
if useAVX2 {
hChaCha20AVX(out, nonce, key)
} else if useSSSE3 {
hChaCha20SSSE3(out, nonce, key)
} else if useSSE2 { // on amd64 this is always true - neccessary for testing generic on amd64
hChaCha20SSE2(out, nonce, key)
} else {
hChaCha20Generic(out, nonce, key)
}
}
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
if useAVX2 {
return xorKeyStreamAVX2(dst, src, block, state, rounds)
} else if useSSSE3 {
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
} else if useSSE2 { // on amd64 this is always true - neccessary for testing generic on amd64
return xorKeyStreamSSE2(dst, src, block, state, rounds)
}
return xorKeyStreamGeneric(dst, src, block, state, rounds)
}

26
vendor/github.com/aead/chacha20/chacha/chacha_ref.go generated vendored Normal file
View file

@ -0,0 +1,26 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build !amd64,!386 gccgo appengine nacl
package chacha
import "encoding/binary"
func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
binary.LittleEndian.PutUint32(state[0:], sigma[0])
binary.LittleEndian.PutUint32(state[4:], sigma[1])
binary.LittleEndian.PutUint32(state[8:], sigma[2])
binary.LittleEndian.PutUint32(state[12:], sigma[3])
copy(state[16:], key[:])
copy(state[48:], nonce[:])
}
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
return xorKeyStreamGeneric(dst, src, block, state, rounds)
}
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
hChaCha20Generic(out, nonce, key)
}

382
vendor/github.com/aead/chacha20/chacha/chacha_test.go generated vendored Normal file
View file

@ -0,0 +1,382 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
package chacha
import (
"bytes"
"encoding/hex"
"testing"
)
func toHex(bits []byte) string {
return hex.EncodeToString(bits)
}
func fromHex(bits string) []byte {
b, err := hex.DecodeString(bits)
if err != nil {
panic(err)
}
return b
}
func TestHChaCha20(t *testing.T) {
defer func(sse2, ssse3, avx2 bool) {
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
}(useSSE2, useSSSE3, useAVX2)
if useAVX2 {
t.Log("AVX2 version")
testHChaCha20(t)
useAVX2 = false
}
if useSSSE3 {
t.Log("SSSE3 version")
testHChaCha20(t)
useSSSE3 = false
}
if useSSE2 {
t.Log("SSE2 version")
testHChaCha20(t)
useSSE2 = false
}
t.Log("generic version")
testHChaCha20(t)
}
func TestVectors(t *testing.T) {
defer func(sse2, ssse3, avx2 bool) {
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
}(useSSE2, useSSSE3, useAVX2)
if useAVX2 {
t.Log("AVX2 version")
testVectors(t)
useAVX2 = false
}
if useSSSE3 {
t.Log("SSSE3 version")
testVectors(t)
useSSSE3 = false
}
if useSSE2 {
t.Log("SSE2 version")
testVectors(t)
useSSE2 = false
}
t.Log("generic version")
testVectors(t)
}
func TestIncremental(t *testing.T) {
defer func(sse2, ssse3, avx2 bool) {
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
}(useSSE2, useSSSE3, useAVX2)
if useAVX2 {
t.Log("AVX2 version")
testIncremental(t, 5, 2049)
useAVX2 = false
}
if useSSSE3 {
t.Log("SSSE3 version")
testIncremental(t, 5, 2049)
useSSSE3 = false
}
if useSSE2 {
t.Log("SSE2 version")
testIncremental(t, 5, 2049)
}
}
func testHChaCha20(t *testing.T) {
for i, v := range hChaCha20Vectors {
var key [32]byte
var nonce [16]byte
copy(key[:], v.key)
copy(nonce[:], v.nonce)
hChaCha20(&key, &nonce, &key)
if !bytes.Equal(key[:], v.keystream) {
t.Errorf("Test %d: keystream mismatch:\n \t got: %s\n \t want: %s", i, toHex(key[:]), toHex(v.keystream))
}
}
}
func testVectors(t *testing.T) {
for i, v := range vectors {
if len(v.plaintext) == 0 {
v.plaintext = make([]byte, len(v.ciphertext))
}
dst := make([]byte, len(v.ciphertext))
XORKeyStream(dst, v.plaintext, v.nonce, v.key, v.rounds)
if !bytes.Equal(dst, v.ciphertext) {
t.Errorf("Test %d: ciphertext mismatch:\n \t got: %s\n \t want: %s", i, toHex(dst), toHex(v.ciphertext))
}
c, err := NewCipher(v.nonce, v.key, v.rounds)
if err != nil {
t.Fatal(err)
}
c.XORKeyStream(dst[:1], v.plaintext[:1])
c.XORKeyStream(dst[1:], v.plaintext[1:])
if !bytes.Equal(dst, v.ciphertext) {
t.Errorf("Test %d: ciphertext mismatch:\n \t got: %s\n \t want: %s", i, toHex(dst), toHex(v.ciphertext))
}
}
}
func testIncremental(t *testing.T, iter int, size int) {
sse2, ssse3, avx2 := useSSE2, useSSSE3, useAVX2
msg, ref, stream := make([]byte, size), make([]byte, size), make([]byte, size)
for i := 0; i < iter; i++ {
var key [32]byte
var nonce []byte
switch i % 3 {
case 0:
nonce = make([]byte, 8)
case 1:
nonce = make([]byte, 12)
case 2:
nonce = make([]byte, 24)
}
for j := range key {
key[j] = byte(len(nonce) + i)
}
for j := range nonce {
nonce[j] = byte(i)
}
for j := 0; j <= len(msg); j++ {
useSSE2, useSSSE3, useAVX2 = false, false, false
XORKeyStream(ref[:j], msg[:j], nonce, key[:], 20)
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
XORKeyStream(stream[:j], msg[:j], nonce, key[:], 20)
if !bytes.Equal(ref[:j], stream[:j]) {
t.Fatalf("Iteration %d failed:\n Message length: %d\n\n got: %s\nwant: %s", i, j, toHex(stream[:j]), toHex(ref[:j]))
}
useSSE2, useSSSE3, useAVX2 = false, false, false
c, _ := NewCipher(nonce, key[:], 20)
c.XORKeyStream(stream[:j], msg[:j])
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
c, _ = NewCipher(nonce, key[:], 20)
c.XORKeyStream(stream[:j], msg[:j])
if !bytes.Equal(ref[:j], stream[:j]) {
t.Fatalf("Iteration %d failed:\n Message length: %d\n\n got: %s\nwant: %s", i, j, toHex(stream[:j]), toHex(ref[:j]))
}
}
copy(msg, stream)
}
}
var hChaCha20Vectors = []struct {
key, nonce, keystream []byte
}{
{
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000000000000000000000000000"),
fromHex("1140704c328d1d5d0e30086cdf209dbd6a43b8f41518a11cc387b669b2ee6586"),
},
{
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000000000000000000000000000"),
fromHex("7d266a7fd808cae4c02a0a70dcbfbcc250dae65ce3eae7fc210f54cc8f77df86"),
},
{
fromHex("0000000000000000000000000000000000000000000000000000000000000001"),
fromHex("000000000000000000000000000000000000000000000002"),
fromHex("e0c77ff931bb9163a5460c02ac281c2b53d792b1c43fea817e9ad275ae546963"),
},
{
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
fromHex("000102030405060708090a0b0c0d0e0f1011121314151617"),
fromHex("51e3ff45a895675c4b33b46c64f4a9ace110d34df6a2ceab486372bacbd3eff6"),
},
}
var vectors = []struct {
key, nonce, plaintext, ciphertext []byte
rounds int
}{
{
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000"),
nil,
fromHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586"),
20,
},
{
fromHex("0000000000000000000000000000000000000000000000000000000000000001"),
fromHex("000000000000000000000002"),
fromHex("00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" +
"416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69" +
"636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073" +
"746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572" +
"656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e" +
"747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e696361" +
"74696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f"),
fromHex("ecfa254f845f647473d3cb140da9e87606cb33066c447b87bc2666dde3fbb739a371c9ec7abcb4cfa9211f7d90f64c2d07f89e5cf9b93e330a6e4c08af5ba6d5" +
"a3fbf07df3fa2fde4f376ca23e82737041605d9f4f4f57bd8cff2c1d4b7955ec2a97948bd3722915c8f3d337f7d370050e9e96d647b7c39f56e031ca5eb6250d" +
"4042e02785ececfa4b4bb5e8ead0440e20b6e8db09d881a7c6132f420e52795042bdfa7773d8a9051447b3291ce1411c680465552aa6c405b7764d5e87bea85a" +
"d00f8449ed8f72d0d662ab052691ca66424bc86d2df80ea41f43abf937d3259dc4b2d0dfb48a6c9139ddd7f76966e928e635553ba76c5c879d7b35d49eb2e62b" +
"0871cdac638939e25e8a1e0ef9d5280fa8ca328b351c3c765989cbcf3daa8b6ccc3aaf9f3979c92b3720fc88dc95ed84a1be059c6499b9fda236e7e818b04b0b" +
"c39c1e876b193bfe5569753f88128cc08aaa9b63d1a16f80ef2554d7189c411f5869ca52c5b83fa36ff216b9c1d30062bebcfd2dc5bce0911934fda79a86f6e6" +
"98ced759c3ff9b6477338f3da4f9cd8514ea9982ccafb341b2384dd902f3d1ab7ac61dd29c6f21ba5b862f3730e37cfdc4fd806c22f221"),
20,
},
{
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000"),
nil,
fromHex("e29edae0466dea17f2576ce95025dd2db2d34fc81b5153f1b70a87f315a35286"),
20,
},
{
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000"),
nil,
fromHex("e29edae0466dea17f2576ce95025dd2db2d34fc81b5153f1b70a87f315a35286fb56db91e8dbf0a93faaa25777aad63450dae65ce3eae7fc210f54cc8f77df8662f8" +
"955228b2358d61d8c5ccf63a6c40203be5fb4541c39c52861de70b8a1416ddd3fe9a818bae8f0e8ff2288cede0459fbb00032fd85fef972fcb586c228d"),
20,
},
{
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
fromHex("0000000000000000"),
nil,
fromHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee65869f07" +
"e7be5551387a98ba977c732d080dcb0f29a048e3656912c6533e32ee7aed29b721769ce64e43d57133b074d839d531ed1f28510afb45ace10a1f4b794d6f2d09a0e663266ce1ae7ed1081968a0758e7" +
"18e997bd362c6b0c34634a9a0b35d012737681f7b5d0f281e3afde458bc1e73d2d313c9cf94c05ff3716240a248f21320a058d7b3566bd520daaa3ed2bf0ac5b8b120fb852773c3639734b45c91a42d" +
"d4cb83f8840d2eedb158131062ac3f1f2cf8ff6dcd1856e86a1e6c3167167ee5a688742b47c5adfb59d4df76fd1db1e51ee03b1ca9f82aca173edb8b7293474ebe980f904d10c916442b4783a0e9848" +
"60cb6c957b39c38ed8f51cffaa68a4de01025a39c504546b9dc1406a7eb28151e5150d7b204baa719d4f091021217db5cf1b5c84c4fa71a879610a1a695ac527c5b56774a6b8a21aae88685868e094c" +
"f29ef4090af7a90cc07e8817aa528763797d3c332b67ca4bc110642c2151ec47ee84cb8c42d85f10e2a8cb18c3b7335f26e8c39a12b1bcc1707177b7613873"),
20,
},
{
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
fromHex("0100000000000000"),
nil,
fromHex("ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d32111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b5305" +
"e5e44aff19b235936144675efbe4409eb7e8e5f1430f5f5836aeb49bb5328b017c4b9dc11f8a03863fa803dc71d5726b2b6b31aa32708afe5af1d6b690584d58792b271e5fdb92c486051c48b79a4d4" +
"8a109bb2d0477956e74c25e93c3c2db34bf779470464a033b8394517a5cf3576a6618c8551a456628b253ef0117c90cd46d8177a2a06d16e20e05c05f889bf87e95d6ee8a03807d1cd53d586872b125" +
"9d0647da7b7aae80af9b3aad41ad5a8141d2e156c9dd52a3bd2ae165bd7d6a2a4e2cf6938b8b390828ff20dc8fd60e2cd17fe368e35b467a70654ba93cfa62760a9d2f26da7818d4d863808e1add5ff" +
"db76d41efd524ded4246e03caa008950c91dedfc9a8e68173fe481c4d3d3c215fdf3af22aeab0097b835a84faabbbce094c6181a193ffeda067271ff7c10cce76542241116283842e31e922430211dc" +
"b38e556158fc2daaec367b705b75f782f8bc2c2c5e33a375390c3052f7e3446feb105fb47820f1d2539811c5b49bb76dc15f2d20a7e2c200b573db9f653ed7"),
20,
},
{
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
fromHex("0001020304050607"),
nil,
fromHex("f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a3800" +
"8b9a26bc35941e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc3852" +
"45fe054e3dd5a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2cc" +
"b27d5aaae0ad7ad0f9d4b6ad3b54098746d4524d38407a6deb3ab78fab78c94213668bbbd394c5de93b853178addd6b97f9fa1ec3e56c00c9ddff0a44a204241175a4cab0f961ba53ede9bdf960b94f" +
"9829b1f3414726429b362c5b538e391520f489b7ed8d20ae3fd49e9e259e44397514d618c96c4846be3c680bdc11c71dcbbe29ccf80d62a0938fa549391e6ea57ecbe2606790ec15d2224ae307c1442" +
"26b7c4e8c2f97d2a1d67852d29beba110edd445197012062a393a9c92803ad3b4f31d7bc6033ccf7932cfed3f019044d25905916777286f82f9a4cc1ffe430"),
20,
},
{
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000"),
nil,
fromHex("9bf49a6a0755f953811fce125f2683d50429c3bb49e074147e0089a52eae155f0564f879d27ae3c02ce82834acfa8c793a629f2ca0de6919610be82f411326be0bd588" +
"41203e74fe86fc71338ce0173dc628ebb719bdcbcc151585214cc089b442258dcda14cf111c602b8971b8cc843e91e46ca905151c02744a6b017e69316b20cd67c4bdecc538e8be990c1b6425d68bfd3a" +
"6fe97693e4846351596cca8abf59fddd0b7f52dcc0c60a448cbf9511610b0a742f1e4d238a7a45cae054ec2"),
12,
},
{
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000"),
nil,
fromHex("789cc357f0b6cda5395f08c8538f1226d08eb3e16ebd6b6db6cc9ca77d81d900bb9d21f6ef0b720550d161f1a80fab0468e48c086daad356edce3a3f988d8e"),
12,
},
{
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
fromHex("0001020304050607"),
nil,
fromHex("6898eb04f3d151985e28e882f35daf28d2a1689f79081ffb08cdc48edbbd3dcd683c764f3dd7302293928ca3d4ef4194e6e22f41a72204a14b89115d06ca29fb0b9f6e" +
"ba3da6793a928afe76cdf62a5d5b0898bb9bb2348612189fdb825e5aa7559c9ec79ff80d05079fad81e9bc2521b2ebcb179cebeade91f20ff3e13192d60de2ee983ec07047e7827594773c28448d89e9b" +
"96bb0f8665b1a56f85abebd584a446e17d5a6fb847a1dbf341ece5124ff5f80d4a57fb7edf65a2907939b2f3c9654ccbfa2e5225edc8d799bf7ce296d6c8f9234cec0bd7b91b3d2ddc27f93ff8591ddb3" +
"62b54fab111a7da9d5b4187661ed0e691f7aa5959fb83112427a95bbeb"),
12,
},
{
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
fromHex("0001020304050607"),
nil,
fromHex("40e1aaea1c843baa28b18eb728fec05dce47b0e824bf9a5d3f1bb1aad13b37fbbf0b0e146732c16380efeab70a1b6edff9acedc876b70d98b61f192290537973"),
8,
},
{
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000000000000000000000000000"),
nil,
fromHex("bcd02a18bf3f01d19292de30a7a8fdaca4b65e50a6002cc72cd6d2f7c91ac3d5728f83e0aad2bfcf9abd2d2db58faedd65015dd83fc09b131e271043019e8e0f789e96" +
"89e5208d7fd9e1f3c5b5341f48ef18a13e418998addadd97a3693a987f8e82ecd5c1433bfed1af49750c0f1ff29c4174a05b119aa3a9e8333812e0c0fea49e1ee0134a70a9d49c24e0cbd8fc3ba27e97c" +
"3322ad487f778f8dc6a122fa59cbe33e7"),
20,
},
{
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000000000000000000000000000"),
nil,
fromHex("ccfe8a9e93431bd582f07b3eb0f4a7afc22ef39337ddd84f0d3545b318a315a32b3abb96de0fc6acde48b248fe8a80e6fa72bfcdf9d8d2656b991676476f052d937308" +
"0e30d8c0e217126a3c64402e1d9404ba9d6b8ce4ad5ac9693f3660638c26ea2cd1b4a8d3348c1e179ead353ee72fee558e9994c51a27195e287d00ec2f8cfef8866d1f98714f40cbe4e18cebabf3cd1fd" +
"3bb65506e5dce1ad09f438bffe2c96d7f2f0827c8c3f2ca59dbaa393785c6b8da7c69c8a4a63ffd113dcc93de8f52dbcfaed5e4cbcc1dc310b1352868fab7b14d930a9f7a7d47bed0eaf5b151f6dac8bd" +
"45510698bdc205d70b944ea5450888dd3ec753da9708bf06c0714822dda74f285c361abd0cd1071324c253dc421905edca36e8808bffef091e7dbdecebdad98cf70b7cede72e9c3c4108e5b32ffae0f42" +
"151a8196939d8e3b8384be1"),
20,
},
{
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
fromHex("000102030405060708090a0b0c0d0e0f1011121314151617"),
nil,
fromHex("e53a61cef151e81401067de33adfc02e90ab205361b49b539fda7f0e63b1bc7d68fbee56c9c20c39960e595f3ea76c979804d08cfa728e66cb5f766b840ec61f9ec20f" +
"7f90d28dae334426cecb52a8e84b4728a5fdd61deb7f1a3fb63dadf5595e06b6e441670964d595ae59cf21536271bae2594774fb19079b933d8fe744f4"),
20,
},
{
fromHex("FF00000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000"),
nil,
fromHex("4fe0956ef81829ff96ef093f03c15dc0eaf4e6905eff9777a5db78348915689ed64204e8fce664cb71ea4016185d15e05be4329e02fcd472707508ef62fd89565ffa632effdb" +
"bf08394aa437d8ff093e6cea49b61672cf294474927a8150e06cec9fdec0f5cf26f257fe335a8d7dd6d208e6df6f0a83bb1b0b5c574edc2c9a604e4310acb970815a9819c91a5137794d1ee71ede3e5d59f27e76" +
"84d287d704fe3945de0a9b66be3d86e66980263602aeb600efaef243b1adf4c701dbf8f57427dee71dacd703d25317ffc7a67e7881ad13f0bf096d3b0486eec71fef5e0efb5964d14eb2cea0336e34ed4444cc2b" +
"bdbd8ef5ba89a0a5e9e35a2e23b38d3f9136f42aefb25c2e7eae0b42c1d1ada5618c5299aedd469ce4f9353ccbae3f89110922b669b8d1b62e72aaf893b83ca264707efbefdcf22ef2333b01f18a849653b52925" +
"63c37314bf34289b0636a2f8c24bc97fec554a9c31ec2cb4e30ba70fa965a17561e56739be138d86a4777f866ca24ba24f70913230e1b3ea34a9a90eea1b6a3a81b93286bb582a53e78557845a654775a18efb77" +
"eee098d2680bc4ceb866874f31c7fadd70262cca6039833522de03cb2527dc5cfc7072db48b6011b852d705c7b24ffedf52facf352ab2512c625811db7965edc87d08f7f27e02665c9a6a42968e4c58cd86aa847" +
"69658153b62f208b2dcfbcb364d63e6671cf60698640"),
20,
},
{
fromHex("0120000000000000000000000000007000000000000000000000000000000DEF"),
fromHex("000000000000000000000000"),
nil,
fromHex("ba6bce79c4f79c815b7fec53840ff0549ff5496378aa1f6ba481a48a5b9b8dbea8b820eccbc4eca37e1050fc53510a746037d2707f81e9683ec3f495b02ad0f848d7f9bf67bc" +
"6299be525d1bf3bfd9953caa12cc4e1d5a6969e6fcd5d3c3e3d9f2e735cd7808755ddda7b22a3ae6040e7f8d05d62661a97d84dad694c69637aea3ae0af9f73303ffce3ae6161281d7a3c7e50a5706d766b34ddd" +
"eab6974fdab10b3f48fb31f26df72e54c616edf1afc019f240c059a7c003677008227f49b021bc23c9c51d6f85ad136a4aa4950d9692f7094d344d88c05868691eb620d39bd8154986c971a8c9552ff0015fd78a" +
"6bdd33df94b0056786a1e0ceb9cc9a38a31fbba224c1fb82bf6af376f67e94337a730301a6365d49b0dd56328e0269cbdfb5bcbccf1c7c3f4922ec1310aa2ef8136be788a55190453d3d3153b1b960a16f79365a" +
"0bc7d6d2d5cda9f0993dbb815ee72f83b9d2ed296598fb21d91c29d1acf4ff0a549784a1d6a4f0935ee18efbf41fdc98d81c449544e9701d92648c06e5f416833b90d15fd4c04fc720a5ec6c6fc8b3d85a66826a" +
"5e6817e21c4c4c0d7151b128236c41397ad4c6549e827c42269659973c153db70ffc33951b19ff21428091cea3836f72f88082508bae1839b59fa9c2556bdf373419d3cf29a8fad4d1787d829ad884f9927228fc" +
"0b8bb7f1a067e7bdbf06c3885154f76f5be0cde8c7c59442b72b0e3f0341afe644e7eb4c29a467288aebc893e17b446c63da7551b8b59ebdd0cbcd65bc79a969bd3397f83d149840de731df4c09a833d5bd9feda" +
"e1cd78a09b233b020de86ab71b9fd425adf84e502cef7c62015eade66ca91b0a90306894b53c7c5147e524d7b919ccdd0731e4eef8fe476b6eed38c91b611cd1777b9acf6eee0a11eaff16ae872db92a5d133fe7" +
"bed999882da283893dd1e96f530be3cd36bf38c16deed2cd77651b6e0d3628de3cb86a78f1d07f6fc79434da5f73888be617b84595acef154f66b95ade1a3e120421a9dac6eec1e5b60139da3d604a03d4a9b7a3" +
"0810a9c7d551aa8df08e11544486ad33000bfe410e8e6f35cb9d22806a5fcacefc6a1257d373d426243576fad9b20ad5ba84befc1a47c79d7bd2923b5776d3df86c8ed98b700d317502849ec8c02ecb8513a7a32" +
"e2db15e75a814f12cfc20429ae06cae2021406b4f174ce56dca65f7994a3b2722e764520a52f87d0a887fc771dbfbf381b4f750dc074fedec1a43a4df37a5a2c148f89d9630ebbd1be1858bed10207cdacae9a0a" +
"b92df58de53de4718f929a83474fbcf9969f1d28a5b257cacd56f0ff0bc425c93d8c91ac833c2cfefb97d82fe6236f3ec3c29e0112a6cac5abfec733db41265f8ff486e7d7fa0b3d9766357377f089056c9408d8" +
"2f09f18700236cc1058ea1c273e287d07d521fdbb5e28d41cc1d95999eccee"),
20,
},
}

41
vendor/github.com/aead/chacha20/chacha20.go generated vendored Normal file
View file

@ -0,0 +1,41 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// Package chacha20 implements the ChaCha20 / XChaCha20 stream chipher.
// Notice that one specific key-nonce combination must be unique for all time.
//
// There are three versions of ChaCha20:
// - ChaCha20 with a 64 bit nonce (en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
// - ChaCha20 with a 96 bit nonce (en/decrypt up to 2^32 * 64 bytes (~256 GB) for one key-nonce combination)
// - XChaCha20 with a 192 bit nonce (en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
package chacha20 // import "github.com/aead/chacha20"
import (
"crypto/cipher"
"github.com/aead/chacha20/chacha"
)
// XORKeyStream crypts bytes from src to dst using the given nonce and key.
// The length of the nonce determinds the version of ChaCha20:
// - 8 bytes: ChaCha20 with a 64 bit nonce and a 2^64 * 64 byte period.
// - 12 bytes: ChaCha20 as defined in RFC 7539 and a 2^32 * 64 byte period.
// - 24 bytes: XChaCha20 with a 192 bit nonce and a 2^64 * 64 byte period.
// Src and dst may be the same slice but otherwise should not overlap.
// If len(dst) < len(src) this function panics.
// If the nonce is neither 64, 96 nor 192 bits long, this function panics.
func XORKeyStream(dst, src, nonce, key []byte) {
chacha.XORKeyStream(dst, src, nonce, key, 20)
}
// NewCipher returns a new cipher.Stream implementing a ChaCha20 version.
// The nonce must be unique for one key for all time.
// The length of the nonce determinds the version of ChaCha20:
// - 8 bytes: ChaCha20 with a 64 bit nonce and a 2^64 * 64 byte period.
// - 12 bytes: ChaCha20 as defined in RFC 7539 and a 2^32 * 64 byte period.
// - 24 bytes: XChaCha20 with a 192 bit nonce and a 2^64 * 64 byte period.
// If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned.
func NewCipher(nonce, key []byte) (cipher.Stream, error) {
return chacha.NewCipher(nonce, key, 20)
}

108
vendor/github.com/aead/chacha20/chacha20_test.go generated vendored Normal file
View file

@ -0,0 +1,108 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
package chacha20
import (
"bytes"
"encoding/hex"
"testing"
"github.com/aead/chacha20/chacha"
)
func toHex(bits []byte) string {
return hex.EncodeToString(bits)
}
func fromHex(bits string) []byte {
b, err := hex.DecodeString(bits)
if err != nil {
panic(err)
}
return b
}
func TestVectors(t *testing.T) {
for i, v := range vectors {
if len(v.plaintext) == 0 {
v.plaintext = make([]byte, len(v.ciphertext))
}
dst := make([]byte, len(v.ciphertext))
XORKeyStream(dst, v.plaintext, v.nonce, v.key)
if !bytes.Equal(dst, v.ciphertext) {
t.Errorf("Test %d: ciphertext mismatch:\n \t got: %s\n \t want: %s", i, toHex(dst), toHex(v.ciphertext))
}
c, err := NewCipher(v.nonce, v.key)
if err != nil {
t.Fatal(err)
}
c.XORKeyStream(dst[:1], v.plaintext[:1])
c.XORKeyStream(dst[1:], v.plaintext[1:])
if !bytes.Equal(dst, v.ciphertext) {
t.Errorf("Test %d: ciphertext mismatch:\n \t got: %s\n \t want: %s", i, toHex(dst), toHex(v.ciphertext))
}
}
}
func benchmarkCipher(b *testing.B, size int, nonceSize int) {
var key [32]byte
nonce := make([]byte, nonceSize)
c, _ := NewCipher(nonce, key[:])
buf := make([]byte, size)
b.SetBytes(int64(len(buf)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
c.XORKeyStream(buf, buf)
}
}
func benchmarkXORKeyStream(b *testing.B, size int, nonceSize int) {
var key [32]byte
nonce := make([]byte, nonceSize)
buf := make([]byte, size)
b.SetBytes(int64(len(buf)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
XORKeyStream(buf, buf, nonce[:], key[:])
}
}
func BenchmarkChaCha20_64(b *testing.B) { benchmarkCipher(b, 64, chacha.NonceSize) }
func BenchmarkChaCha20_1K(b *testing.B) { benchmarkCipher(b, 1024, chacha.NonceSize) }
func BenchmarkXChaCha20_64(b *testing.B) { benchmarkXORKeyStream(b, 64, chacha.XNonceSize) }
func BenchmarkXChaCha20_1K(b *testing.B) { benchmarkXORKeyStream(b, 1024, chacha.XNonceSize) }
func BenchmarkXORKeyStream64(b *testing.B) { benchmarkXORKeyStream(b, 64, chacha.NonceSize) }
func BenchmarkXORKeyStream1K(b *testing.B) { benchmarkXORKeyStream(b, 1024, chacha.NonceSize) }
func BenchmarkXChaCha20_XORKeyStream64(b *testing.B) { benchmarkXORKeyStream(b, 64, chacha.XNonceSize) }
func BenchmarkXChaCha20_XORKeyStream1K(b *testing.B) {
benchmarkXORKeyStream(b, 1024, chacha.XNonceSize)
}
var vectors = []struct {
key, nonce, plaintext, ciphertext []byte
}{
{
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
fromHex("0000000000000000"),
nil,
fromHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586"),
},
{
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000"),
nil,
fromHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586"),
},
{
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
fromHex("000000000000000000000000000000000000000000000000"),
nil,
fromHex("bcd02a18bf3f01d19292de30a7a8fdaca4b65e50a6002cc72cd6d2f7c91ac3d5728f83e0aad2bfcf9abd2d2db58faedd65015dd83fc09b131e271043019e8e0f"),
},
}

25
vendor/github.com/aead/poly1305/.gitignore generated vendored Normal file
View file

@ -0,0 +1,25 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
.vscode
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof

16
vendor/github.com/aead/poly1305/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,16 @@
language: go
go:
- 1.7
- 1.8
env:
- TRAVIS_GOARCH=amd64
- TRAVIS_GOARCH=386
before_install:
- export GOARCH=$TRAVIS_GOARCH
branches:
only:
- master

21
vendor/github.com/aead/poly1305/LICENSE generated vendored Normal file
View file

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2016 Andreas Auernhammer
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

48
vendor/github.com/aead/poly1305/README.md generated vendored Normal file
View file

@ -0,0 +1,48 @@
[![Godoc Reference](https://godoc.org/github.com/aead/poly1305?status.svg)](https://godoc.org/github.com/aead/poly1305)
## The poly1305 message authentication code
Poly1305 is a fast, one-time authentication function created by Daniel J. Bernstein.
It is infeasible for an attacker to generate an authenticator for a message without the key.
However, a key must only be used for a single message. Authenticating two different messages
with the same key allows an attacker to forge authenticators for other messages with the same key.
### Installation
Install in your GOPATH: `go get -u github.com/aead/poly1305`
### Requirements
All Go versions >= 1.7 are supported.
### Performance
#### AMD64
Hardware: Intel i7-6500U 2.50GHz x 2
System: Linux Ubuntu 16.04 - kernel: 4.4.0-62-generic
Go version: 1.8.0
**AVX2**
```
name speed cpb
Sum_64-4 1.60GB/s ± 0% 1.39
Sum_256-4 2.32GB/s ± 1% 1.00
Sum_1K-4 3.61GB/s ± 1% 0.65
Sum_8K-4 4.20GB/s ± 1% 0.55
Write_64-4 2.04GB/s ± 0% 1.14
Write_256-4 3.50GB/s ± 2% 0.67
Write_1K-4 4.08GB/s ± 2% 0.57
Write_8K-4 4.25GB/s ± 2% 0.55
```
**x64**
```
name speed cpb
Sum_64-4 1.60GB/s ± 1% 1.46
Sum_256-4 2.11GB/s ± 3% 1.10
Sum_1K-4 2.35GB/s ±13% 0.99
Sum_8K-4 2.47GB/s ±13% 0.94
Write_64-4 1.81GB/s ± 5% 1.29
Write_256-4 2.24GB/s ± 4% 1.04
Write_1K-4 2.55GB/s ± 0% 0.91
Write_8K-4 2.63GB/s ± 0% 0.88
```

30
vendor/github.com/aead/poly1305/poly1305.go generated vendored Normal file
View file

@ -0,0 +1,30 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// Package poly1305 implements Poly1305 one-time message authentication code
// defined in RFC 7539..
//
// Poly1305 is a fast, one-time authentication function. It is infeasible for an
// attacker to generate an authenticator for a message without the key.
// However, a key must only be used for a single message. Authenticating two
// different messages with the same key allows an attacker to forge
// authenticators for other messages with the same key.
package poly1305 // import "github.com/aead/poly1305"
import (
"crypto/subtle"
"errors"
)
// TagSize is the size of the poly1305 authentication tag in bytes.
const TagSize = 16
var errWriteAfterSum = errors.New("checksum already computed - adding more data is not allowed")
// Verify returns true if and only if the mac is a valid authenticator
// for msg with the given key.
func Verify(mac *[TagSize]byte, msg []byte, key [32]byte) bool {
sum := Sum(msg, key)
return subtle.ConstantTimeCompare(sum[:], mac[:]) == 1
}

871
vendor/github.com/aead/poly1305/poly1305_AVX2_amd64.s generated vendored Normal file
View file

@ -0,0 +1,871 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// This code is inspired by the poly1305 AVX2 implementation by Shay Gueron, and Vlad Krasnov.
// +build amd64, !gccgo, !appengine
#include "textflag.h"
DATA addMaskAVX2<>+0x00(SB)/8, $0x3FFFFFF
DATA addMaskAVX2<>+0x08(SB)/8, $0x3FFFFFF
DATA addMaskAVX2<>+0x10(SB)/8, $0x3FFFFFF
DATA addMaskAVX2<>+0x18(SB)/8, $0x3FFFFFF
GLOBL addMaskAVX2<>(SB), RODATA, $32
DATA poly1305MaskAVX2<>+0x00(SB)/8, $0xFFFFFFC0FFFFFFF
DATA poly1305MaskAVX2<>+0x08(SB)/8, $0xFFFFFFC0FFFFFFF
DATA poly1305MaskAVX2<>+0x10(SB)/8, $0xFFFFFFC0FFFFFFF
DATA poly1305MaskAVX2<>+0x18(SB)/8, $0xFFFFFFC0FFFFFFF
DATA poly1305MaskAVX2<>+0x20(SB)/8, $0xFFFFFFC0FFFFFFC
DATA poly1305MaskAVX2<>+0x28(SB)/8, $0xFFFFFFC0FFFFFFC
DATA poly1305MaskAVX2<>+0x30(SB)/8, $0xFFFFFFC0FFFFFFC
DATA poly1305MaskAVX2<>+0x38(SB)/8, $0xFFFFFFC0FFFFFFC
GLOBL poly1305MaskAVX2<>(SB), RODATA, $64
DATA oneBit<>+0x00(SB)/8, $0x1000000
DATA oneBit<>+0x08(SB)/8, $0x1000000
DATA oneBit<>+0x10(SB)/8, $0x1000000
DATA oneBit<>+0x18(SB)/8, $0x1000000
GLOBL oneBit<>(SB), RODATA, $32
DATA fixPermutation<>+0x00(SB)/4, $6
DATA fixPermutation<>+0x04(SB)/4, $7
DATA fixPermutation<>+0x08(SB)/4, $6
DATA fixPermutation<>+0x0c(SB)/4, $7
DATA fixPermutation<>+0x10(SB)/4, $6
DATA fixPermutation<>+0x14(SB)/4, $7
DATA fixPermutation<>+0x18(SB)/4, $6
DATA fixPermutation<>+0x1c(SB)/4, $7
DATA fixPermutation<>+0x20(SB)/4, $4
DATA fixPermutation<>+0x24(SB)/4, $5
DATA fixPermutation<>+0x28(SB)/4, $6
DATA fixPermutation<>+0x2c(SB)/4, $7
DATA fixPermutation<>+0x30(SB)/4, $6
DATA fixPermutation<>+0x34(SB)/4, $7
DATA fixPermutation<>+0x38(SB)/4, $6
DATA fixPermutation<>+0x3c(SB)/4, $7
DATA fixPermutation<>+0x40(SB)/4, $2
DATA fixPermutation<>+0x44(SB)/4, $3
DATA fixPermutation<>+0x48(SB)/4, $6
DATA fixPermutation<>+0x4c(SB)/4, $7
DATA fixPermutation<>+0x50(SB)/4, $4
DATA fixPermutation<>+0x54(SB)/4, $5
DATA fixPermutation<>+0x58(SB)/4, $6
DATA fixPermutation<>+0x5c(SB)/4, $7
DATA fixPermutation<>+0x60(SB)/4, $0
DATA fixPermutation<>+0x64(SB)/4, $1
DATA fixPermutation<>+0x68(SB)/4, $4
DATA fixPermutation<>+0x6c(SB)/4, $5
DATA fixPermutation<>+0x70(SB)/4, $2
DATA fixPermutation<>+0x74(SB)/4, $3
DATA fixPermutation<>+0x78(SB)/4, $6
DATA fixPermutation<>+0x7c(SB)/4, $7
GLOBL fixPermutation<>(SB), RODATA, $128
TEXT ·initializeAVX2(SB), $0-16
MOVQ state+0(FP), DI
MOVQ key+8(FP), SI
MOVQ $addMaskAVX2<>(SB), R8
MOVOU 16*1(SI), X10
MOVOU X10, 288(DI)
PXOR X10, X10
MOVOU X10, 304(DI)
MOVD X10, 320(DI)
MOVQ 8*0(SI), X5
MOVQ 8*1(SI), X10
VZEROUPPER
MOVQ $poly1305MaskAVX2<>(SB), R9
VPAND (R9), X5, X5
VPAND 32(R9), X10, X10
VMOVDQU 0(R8), X0
VPSRLQ $26, X5, X6
VPAND X0, X5, X5
VPSRLQ $26, X6, X7
VPAND X0, X6, X6
VPSLLQ $12, X10, X11
VPXOR X11, X7, X7
VPSRLQ $26, X7, X8
VPSRLQ $40, X10, X9
VPAND X0, X7, X7
VPAND X0, X8, X8
BYTE $0xc5; BYTE $0xd1; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ X5, X5, X0
BYTE $0xc5; BYTE $0xd1; BYTE $0xf4; BYTE $0xce // VPMULUDQ X6, X5, X1
BYTE $0xc5; BYTE $0xd1; BYTE $0xf4; BYTE $0xd7 // VPMULUDQ X7, X5, X2
BYTE $0xc4; BYTE $0xc1; BYTE $0x51; BYTE $0xf4; BYTE $0xd8 // VPMULUDQ X8, X5, X3
BYTE $0xc4; BYTE $0xc1; BYTE $0x51; BYTE $0xf4; BYTE $0xe1 // VPMULUDQ X9, X5, X4
VPSLLQ $1, X1, X1
VPSLLQ $1, X2, X2
BYTE $0xc5; BYTE $0x49; BYTE $0xf4; BYTE $0xd6 // VPMULUDQ X6, X6, X10
VPADDQ X10, X2, X2
BYTE $0xc5; BYTE $0x49; BYTE $0xf4; BYTE $0xd7 // VPMULUDQ X7, X6, X10
VPADDQ X10, X3, X3
BYTE $0xc4; BYTE $0x41; BYTE $0x49; BYTE $0xf4; BYTE $0xd0 // VPMULUDQ X8, X6, X10
VPADDQ X10, X4, X4
BYTE $0xc4; BYTE $0x41; BYTE $0x49; BYTE $0xf4; BYTE $0xe1 // VPMULUDQ X9, X6, X12
VPSLLQ $1, X3, X3
VPSLLQ $1, X4, X4
BYTE $0xc5; BYTE $0x41; BYTE $0xf4; BYTE $0xd7 // VPMULUDQ X7, X7, X10
VPADDQ X10, X4, X4
BYTE $0xc4; BYTE $0x41; BYTE $0x41; BYTE $0xf4; BYTE $0xd0 // VPMULUDQ X8, X7, X10
VPADDQ X10, X12, X12
BYTE $0xc4; BYTE $0x41; BYTE $0x41; BYTE $0xf4; BYTE $0xe9 // VPMULUDQ X9, X7, X13
VPSLLQ $1, X12, X12
VPSLLQ $1, X13, X13
BYTE $0xc4; BYTE $0x41; BYTE $0x39; BYTE $0xf4; BYTE $0xd0 // VPMULUDQ X8, X8, X10
VPADDQ X10, X13, X13
BYTE $0xc4; BYTE $0x41; BYTE $0x39; BYTE $0xf4; BYTE $0xf1 // VPMULUDQ X9, X8, X14
VPSLLQ $1, X14, X14
BYTE $0xc4; BYTE $0x41; BYTE $0x31; BYTE $0xf4; BYTE $0xf9 // VPMULUDQ X9, X9, X15
VPSRLQ $26, X4, X10
VPAND 0(R8), X4, X4
VPADDQ X10, X12, X12
VPSLLQ $2, X12, X10
VPADDQ X10, X12, X12
VPSLLQ $2, X13, X10
VPADDQ X10, X13, X13
VPSLLQ $2, X14, X10
VPADDQ X10, X14, X14
VPSLLQ $2, X15, X10
VPADDQ X10, X15, X15
VPADDQ X12, X0, X0
VPADDQ X13, X1, X1
VPADDQ X14, X2, X2
VPADDQ X15, X3, X3
VPSRLQ $26, X0, X10
VPAND 0(R8), X0, X0
VPADDQ X10, X1, X1
VPSRLQ $26, X1, X10
VPAND 0(R8), X1, X1
VPADDQ X10, X2, X2
VPSRLQ $26, X2, X10
VPAND 0(R8), X2, X2
VPADDQ X10, X3, X3
VPSRLQ $26, X3, X10
VPAND 0(R8), X3, X3
VPADDQ X10, X4, X4
BYTE $0xc5; BYTE $0xf9; BYTE $0x6c; BYTE $0xed // VPUNPCKLQDQ X5, X0, X5
BYTE $0xc5; BYTE $0xf1; BYTE $0x6c; BYTE $0xf6 // VPUNPCKLQDQ X6, X1, X6
BYTE $0xc5; BYTE $0xe9; BYTE $0x6c; BYTE $0xff // VPUNPCKLQDQ X7, X2, X7
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ X8, X3, X8
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ X9, X4, X9
VMOVDQU X5, 0+16(DI)
VMOVDQU X6, 32+16(DI)
VMOVDQU X7, 64+16(DI)
VMOVDQU X8, 96+16(DI)
VMOVDQU X9, 128+16(DI)
VPSLLQ $2, X6, X1
VPSLLQ $2, X7, X2
VPSLLQ $2, X8, X3
VPSLLQ $2, X9, X4
VPADDQ X1, X6, X1
VPADDQ X2, X7, X2
VPADDQ X3, X8, X3
VPADDQ X4, X9, X4
VMOVDQU X1, 160+16(DI)
VMOVDQU X2, 192+16(DI)
VMOVDQU X3, 224+16(DI)
VMOVDQU X4, 256+16(DI)
VPSHUFD $68, X5, X0
VPSHUFD $68, X6, X1
VPSHUFD $68, X7, X2
VPSHUFD $68, X8, X3
VPSHUFD $68, X9, X4
VMOVDQU 0+16(DI), X10
BYTE $0xc4; BYTE $0xc1; BYTE $0x79; BYTE $0xf4; BYTE $0xea // VPMULUDQ X10, X0, X5
BYTE $0xc4; BYTE $0xc1; BYTE $0x71; BYTE $0xf4; BYTE $0xf2 // VPMULUDQ X10, X1, X6
BYTE $0xc4; BYTE $0xc1; BYTE $0x69; BYTE $0xf4; BYTE $0xfa // VPMULUDQ X10, X2, X7
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xc2 // VPMULUDQ X10, X3, X8
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xca // VPMULUDQ X10, X4, X9
VMOVDQU 160+16(DI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X4, X11
VPADDQ X11, X5, X5
VMOVDQU 32+16(DI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X0, X11
VPADDQ X11, X6, X6
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X1, X11
VPADDQ X11, X7, X7
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X2, X11
VPADDQ X11, X8, X8
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X3, X11
VPADDQ X11, X9, X9
VMOVDQU 192+16(DI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X3, X11
VPADDQ X11, X5, X5
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X4, X11
VPADDQ X11, X6, X6
VMOVDQU 64+16(DI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X0, X11
VPADDQ X11, X7, X7
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X1, X11
VPADDQ X11, X8, X8
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X2, X11
VPADDQ X11, X9, X9
VMOVDQU 224+16(DI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X2, X11
VPADDQ X11, X5, X5
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X3, X11
VPADDQ X11, X6, X6
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X4, X11
VPADDQ X11, X7, X7
VMOVDQU 96+16(DI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X0, X11
VPADDQ X11, X8, X8
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X1, X11
VPADDQ X11, X9, X9
VMOVDQU 256+16(DI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X1, X11
VPADDQ X11, X5, X5
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X2, X11
VPADDQ X11, X6, X6
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X3, X11
VPADDQ X11, X7, X7
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X4, X11
VPADDQ X11, X8, X8
VMOVDQU 128+16(DI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X0, X11
VPADDQ X11, X9, X9
VMOVDQU 0(R8), X12
VPSRLQ $26, X8, X10
VPADDQ X10, X9, X9
VPAND X12, X8, X8
VPSRLQ $26, X9, X10
VPSLLQ $2, X10, X11
VPADDQ X11, X10, X10
VPADDQ X10, X5, X5
VPAND X12, X9, X9
VPSRLQ $26, X5, X10
VPAND X12, X5, X5
VPADDQ X10, X6, X6
VPSRLQ $26, X6, X10
VPAND X12, X6, X6
VPADDQ X10, X7, X7
VPSRLQ $26, X7, X10
VPAND X12, X7, X7
VPADDQ X10, X8, X8
VPSRLQ $26, X8, X10
VPAND X12, X8, X8
VPADDQ X10, X9, X9
VMOVDQU X5, 0(DI)
VMOVDQU X6, 32(DI)
VMOVDQU X7, 64(DI)
VMOVDQU X8, 96(DI)
VMOVDQU X9, 128(DI)
VPSLLQ $2, X6, X1
VPSLLQ $2, X7, X2
VPSLLQ $2, X8, X3
VPSLLQ $2, X9, X4
VPADDQ X1, X6, X1
VPADDQ X2, X7, X2
VPADDQ X3, X8, X3
VPADDQ X4, X9, X4
VMOVDQU X1, 160(DI)
VMOVDQU X2, 192(DI)
VMOVDQU X3, 224(DI)
VMOVDQU X4, 256(DI)
RET
TEXT ·updateAVX2(SB), $0-24
MOVQ state+0(FP), DI
MOVQ msg+8(FP), SI
MOVQ msg_len+16(FP), DX
MOVD 304(DI), X0
MOVD 308(DI), X1
MOVD 312(DI), X2
MOVD 316(DI), X3
MOVD 320(DI), X4
MOVQ $addMaskAVX2<>(SB), R12
MOVQ $oneBit<>(SB), R13
MOVQ $fixPermutation<>(SB), R15
VZEROUPPER
VMOVDQA (R12), Y12
CMPQ DX, $128
JB BETWEEN_0_AND_128
AT_LEAST_128:
VMOVDQU 32*0(SI), Y9
VMOVDQU 32*1(SI), Y10
ADDQ $64, SI
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10,Y9,Y7
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10,Y9,Y8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xff; BYTE $0xd8 // VPERMQ $216,Y7,Y7
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0; BYTE $0xd8 // VPERMQ $216,Y8,Y8
VPSRLQ $26, Y7, Y9
VPAND Y12, Y7, Y7
VPADDQ Y7, Y0, Y0
VPSRLQ $26, Y9, Y7
VPAND Y12, Y9, Y9
VPADDQ Y9, Y1, Y1
VPSLLQ $12, Y8, Y9
VPXOR Y9, Y7, Y7
VPAND Y12, Y7, Y7
VPADDQ Y7, Y2, Y2
VPSRLQ $26, Y9, Y7
VPSRLQ $40, Y8, Y9
VPAND Y12, Y7, Y7
VPXOR (R13), Y9, Y9
VPADDQ Y7, Y3, Y3
VPADDQ Y9, Y4, Y4
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x2f // VPBROADCASTQ 0(DI), Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xfd // VPMULUDQ Y5, Y0, Y7
BYTE $0xc5; BYTE $0x75; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ Y5, Y1, Y8
BYTE $0xc5; BYTE $0x6d; BYTE $0xf4; BYTE $0xcd // VPMULUDQ Y5, Y2, Y9
BYTE $0xc5; BYTE $0x65; BYTE $0xf4; BYTE $0xd5 // VPMULUDQ Y5, Y3, Y10
BYTE $0xc5; BYTE $0x5d; BYTE $0xf4; BYTE $0xdd // VPMULUDQ Y5, Y4, Y11
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0xa0; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 160(DI), Y5
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x6f; BYTE $0x20 // VPBROADCASTQ 32(DI), Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y11, Y11
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0xc0; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 192(DI), Y5
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x6f; BYTE $0x40 // VPBROADCASTQ 64(DI), Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y11, Y11
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0xe0; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 224(DI), Y5
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x6f; BYTE $0x60 // VPBROADCASTQ 96(DI), Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y11, Y11
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0x00; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 256(DI), Y5
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2,Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0x80; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 128(DI),Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y11, Y11
VPSRLQ $26, Y10, Y5
VPADDQ Y5, Y11, Y11
VPAND Y12, Y10, Y10
VPSRLQ $26, Y11, Y5
VPSLLQ $2, Y5, Y6
VPADDQ Y6, Y5, Y5
VPADDQ Y5, Y7, Y7
VPAND Y12, Y11, Y11
VPSRLQ $26, Y7, Y5
VPAND Y12, Y7, Y0
VPADDQ Y5, Y8, Y8
VPSRLQ $26, Y8, Y5
VPAND Y12, Y8, Y1
VPADDQ Y5, Y9, Y9
VPSRLQ $26, Y9, Y5
VPAND Y12, Y9, Y2
VPADDQ Y5, Y10, Y10
VPSRLQ $26, Y10, Y5
VPAND Y12, Y10, Y3
VPADDQ Y5, Y11, Y4
SUBQ $64, DX
CMPQ DX, $128
JAE AT_LEAST_128
BETWEEN_0_AND_128:
CMPQ DX, $64
JB BETWEEN_0_AND_64
VMOVDQU 32*0(SI), Y9
VMOVDQU 32*1(SI), Y10
ADDQ $64, SI
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xff; BYTE $0xd8 // VPERMQ $216, Y7, Y7
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0; BYTE $0xd8 // VPERMQ $216, Y8, Y8
VPSRLQ $26, Y7, Y9
VPAND Y12, Y7, Y7
VPADDQ Y7, Y0, Y0
VPSRLQ $26, Y9, Y7
VPAND Y12, Y9, Y9
VPADDQ Y9, Y1, Y1
VPSLLQ $12, Y8, Y9
VPXOR Y9, Y7, Y7
VPAND Y12, Y7, Y7
VPADDQ Y7, Y2, Y2
VPSRLQ $26, Y9, Y7
VPSRLQ $40, Y8, Y9
VPAND Y12, Y7, Y7
VPXOR (R13), Y9, Y9
VPADDQ Y7, Y3, Y3
VPADDQ Y9, Y4, Y4
VMOVDQU 0(DI), Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xfd // VPMULUDQ Y5, Y0, Y7
BYTE $0xc5; BYTE $0x75; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ Y5, Y1, Y8
BYTE $0xc5; BYTE $0x6d; BYTE $0xf4; BYTE $0xcd // VPMULUDQ Y5, Y2, Y9
BYTE $0xc5; BYTE $0x65; BYTE $0xf4; BYTE $0xd5 // VPMULUDQ Y5, Y3, Y10
BYTE $0xc5; BYTE $0x5d; BYTE $0xf4; BYTE $0xdd // VPMULUDQ Y5, Y4, Y11
VMOVDQU 160(DI), Y5
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y7, Y7
VMOVDQU 32(DI), Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y11, Y11
VMOVDQU 192(DI), Y5
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y8, Y8
VMOVDQU 64(DI), Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y11, Y11
VMOVDQU 224(DI), Y5
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y9, Y9
VMOVDQU 96(DI), Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y11, Y11
VMOVDQU 256(DI), Y5
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y10, Y10
VMOVDQU 128(DI), Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y11, Y11
VPSRLQ $26, Y10, Y5
VPADDQ Y5, Y11, Y11
VPAND Y12, Y10, Y10
VPSRLQ $26, Y11, Y5
VPSLLQ $2, Y5, Y6
VPADDQ Y6, Y5, Y5
VPADDQ Y5, Y7, Y7
VPAND Y12, Y11, Y11
VPSRLQ $26, Y7, Y5
VPAND Y12, Y7, Y0
VPADDQ Y5, Y8, Y8
VPSRLQ $26, Y8, Y5
VPAND Y12, Y8, Y1
VPADDQ Y5, Y9, Y9
VPSRLQ $26, Y9, Y5
VPAND Y12, Y9, Y2
VPADDQ Y5, Y10, Y10
VPSRLQ $26, Y10, Y5
VPAND Y12, Y10, Y3
VPADDQ Y5, Y11, Y4
VPSRLDQ $8, Y0, Y7
VPSRLDQ $8, Y1, Y8
VPSRLDQ $8, Y2, Y9
VPSRLDQ $8, Y3, Y10
VPSRLDQ $8, Y4, Y11
VPADDQ Y7, Y0, Y0
VPADDQ Y8, Y1, Y1
VPADDQ Y9, Y2, Y2
VPADDQ Y10, Y3, Y3
VPADDQ Y11, Y4, Y4
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xf8; BYTE $0xaa // VPERMQ $170, Y0, Y7
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xc1; BYTE $0xaa // VPERMQ $170, Y1, Y8
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xca; BYTE $0xaa // VPERMQ $170, Y2, Y9
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xd3; BYTE $0xaa // VPERMQ $170, Y3, Y10
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xdc; BYTE $0xaa // VPERMQ $170, Y4, Y11
VPADDQ Y7, Y0, Y0
VPADDQ Y8, Y1, Y1
VPADDQ Y9, Y2, Y2
VPADDQ Y10, Y3, Y3
VPADDQ Y11, Y4, Y4
SUBQ $64, DX
BETWEEN_0_AND_64:
TESTQ DX, DX
JZ DONE
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xc0 // VMOVQ X0, X0
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xc9 // VMOVQ X1, X1
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xd2 // VMOVQ X2, X2
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xdb // VMOVQ X3, X3
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xe4 // VMOVQ X4, X4
MOVQ (R13), BX
MOVQ SP, AX
TESTQ $15, DX
JZ FULL_BLOCKS
SUBQ $64, SP
VPXOR Y7, Y7, Y7
VMOVDQU Y7, (SP)
VMOVDQU Y7, 32(SP)
XORQ BX, BX
FLUSH_BUFFER:
MOVB (SI)(BX*1), CX
MOVB CX, (SP)(BX*1)
INCQ BX
CMPQ DX, BX
JNE FLUSH_BUFFER
MOVB $1, (SP)(BX*1)
XORQ BX, BX
MOVQ SP, SI
FULL_BLOCKS:
CMPQ DX, $16
JA AT_LEAST_16
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0x3e // VMOVQ 8*0(SI), X7
BYTE $0xc5; BYTE $0x7a; BYTE $0x7e; BYTE $0x46; BYTE $0x08 // VMOVQ 8*1(SI), X8
BYTE $0xc4; BYTE $0x61; BYTE $0xf9; BYTE $0x6e; BYTE $0xf3 // VMOVQ BX ,X14
VMOVDQA (R15), Y13
JMP MULTIPLY
AT_LEAST_16:
CMPQ DX, $32
JA AT_LEAST_32
VMOVDQU 16*0(SI), X9
VMOVDQU 16*1(SI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x7a; BYTE $0x7e; BYTE $0x75; BYTE $0x00 // VMOVQ (R13), X14
BYTE $0xc4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0xf3; BYTE $0x01 // VPINSRQ $1,BX, X14, X14
VMOVDQA 32(R15), Y13
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
JMP MULTIPLY
AT_LEAST_32:
CMPQ DX, $48
JA AT_LEAST_48
VMOVDQU 32*0(SI), Y9
VMOVDQU 32*1(SI), X10
BYTE $0xc4; BYTE $0x41; BYTE $0x7a; BYTE $0x7e; BYTE $0x75; BYTE $0x00 // VMOVQ 0(R13), X14
BYTE $0xc4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0xf3; BYTE $0x01 // VPINSRQ $1, BX, X14, X14
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xf6; BYTE $0xc4 // VPERMQ $196, Y14, Y14
VMOVDQA 64(R15), Y13
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
JMP MULTIPLY
AT_LEAST_48:
VMOVDQU 32*0(SI), Y9
VMOVDQU 32*1(SI), Y10
BYTE $0xc4; BYTE $0x41; BYTE $0x7a; BYTE $0x7e; BYTE $0x75; BYTE $0x00 // VMOVQ (R13),X14
BYTE $0xc4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0xf3; BYTE $0x01 // VPINSRQ $1,BX,X14,X14
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xf6; BYTE $0x40 // VPERMQ $64,Y14,Y14
VMOVDQA 96(R15), Y13
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
MULTIPLY:
MOVQ AX, SP
VPSRLQ $26, Y7, Y9
VPAND Y12, Y7, Y7
VPADDQ Y7, Y0, Y0
VPSRLQ $26, Y9, Y7
VPAND Y12, Y9, Y9
VPADDQ Y9, Y1, Y1
VPSLLQ $12, Y8, Y9
VPXOR Y9, Y7, Y7
VPAND Y12, Y7, Y7
VPADDQ Y7, Y2, Y2
VPSRLQ $26, Y9, Y7
VPSRLQ $40, Y8, Y9
VPAND Y12, Y7, Y7
VPXOR Y14, Y9, Y9
VPADDQ Y7, Y3, Y3
VPADDQ Y9, Y4, Y4
VMOVDQU 0(DI), Y5
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xfd // VPMULUDQ Y5, Y0, Y7
BYTE $0xc5; BYTE $0x75; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ Y5, Y1, Y8
BYTE $0xc5; BYTE $0x6d; BYTE $0xf4; BYTE $0xcd // VPMULUDQ Y5, Y2, Y9
BYTE $0xc5; BYTE $0x65; BYTE $0xf4; BYTE $0xd5 // VPMULUDQ Y5, Y3, Y10
BYTE $0xc5; BYTE $0x5d; BYTE $0xf4; BYTE $0xdd // VPMULUDQ Y5, Y4, Y11
VMOVDQU 160(DI), Y5
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y7, Y7
VMOVDQU 32(DI), Y5
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y11, Y11
VMOVDQU 192(DI), Y5
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y8, Y8
VMOVDQU 64(DI), Y5
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y11, Y11
VMOVDQU 224(DI), Y5
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y9, Y9
VMOVDQU 96(DI), Y5
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y10, Y10
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
VPADDQ Y6, Y11, Y11
VMOVDQU 256(DI), Y5
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1,Y6
VPADDQ Y6, Y7, Y7
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
VPADDQ Y6, Y8, Y8
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
VPADDQ Y6, Y9, Y9
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
VPADDQ Y6, Y10, Y10
VMOVDQU 128(DI), Y5
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
VPADDQ Y6, Y11, Y11
VPSRLQ $26, Y10, Y5
VPADDQ Y5, Y11, Y11
VPAND Y12, Y10, Y10
VPSRLQ $26, Y11, Y5
VPSLLQ $2, Y5, Y6
VPADDQ Y6, Y5, Y5
VPADDQ Y5, Y7, Y7
VPAND Y12, Y11, Y11
VPSRLQ $26, Y7, Y5
VPAND Y12, Y7, Y0
VPADDQ Y5, Y8, Y8
VPSRLQ $26, Y8, Y5
VPAND Y12, Y8, Y1
VPADDQ Y5, Y9, Y9
VPSRLQ $26, Y9, Y5
VPAND Y12, Y9, Y2
VPADDQ Y5, Y10, Y10
VPSRLQ $26, Y10, Y5
VPAND Y12, Y10, Y3
VPADDQ Y5, Y11, Y4
VPSRLDQ $8, Y0, Y7
VPSRLDQ $8, Y1, Y8
VPSRLDQ $8, Y2, Y9
VPSRLDQ $8, Y3, Y10
VPSRLDQ $8, Y4, Y11
VPADDQ Y7, Y0, Y0
VPADDQ Y8, Y1, Y1
VPADDQ Y9, Y2, Y2
VPADDQ Y10, Y3, Y3
VPADDQ Y11, Y4, Y4
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xf8; BYTE $0xaa // VPERMQ $170, Y0, Y7
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xc1; BYTE $0xaa // VPERMQ $170, Y1, Y8
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xca; BYTE $0xaa // VPERMQ $170, Y2, Y9
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xd3; BYTE $0xaa // VPERMQ $170, Y3, Y10
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xdc; BYTE $0xaa // VPERMQ $170, Y4, Y11
VPADDQ Y7, Y0, Y0
VPADDQ Y8, Y1, Y1
VPADDQ Y9, Y2, Y2
VPADDQ Y10, Y3, Y3
VPADDQ Y11, Y4, Y4
DONE:
VZEROUPPER
MOVD X0, 304(DI)
MOVD X1, 308(DI)
MOVD X2, 312(DI)
MOVD X3, 316(DI)
MOVD X4, 320(DI)
RET
TEXT ·finalizeAVX2(SB), $0-16
MOVQ out+0(FP), SI
MOVQ state+8(FP), DI
VZEROUPPER
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x87; BYTE $0x30; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 304(DI), X0
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x8f; BYTE $0x34; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 308(DI), X1
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x97; BYTE $0x38; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 312(DI), X2
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x9f; BYTE $0x3c; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 316(DI), X3
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0xa7; BYTE $0x40; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 320(DI), X4
VMOVDQU addMaskAVX2<>(SB), X7
VPSRLQ $26, X4, X5
VPSLLQ $2, X5, X6
VPADDQ X6, X5, X5
VPADDQ X5, X0, X0
VPAND X7, X4, X4
VPSRLQ $26, X0, X5
VPAND X7, X0, X0
VPADDQ X5, X1, X1
VPSRLQ $26, X1, X5
VPAND X7, X1, X1
VPADDQ X5, X2, X2
VPSRLQ $26, X2, X5
VPAND X7, X2, X2
VPADDQ X5, X3, X3
VPSRLQ $26, X3, X5
VPAND X7, X3, X3
VPADDQ X5, X4, X4
VPSLLQ $26, X1, X5
VPXOR X5, X0, X0
VPSLLQ $52, X2, X5
VPXOR X5, X0, X0
VPSRLQ $12, X2, X1
VPSLLQ $14, X3, X5
VPXOR X5, X1, X1
VPSLLQ $40, X4, X5
VPXOR X5, X1, X1
VZEROUPPER
MOVQ X0, AX
MOVQ X1, BX
ADDQ 288(DI), AX
ADCQ 288+8(DI), BX
MOVQ AX, (SI)
MOVQ BX, 8(SI)
RET

197
vendor/github.com/aead/poly1305/poly1305_amd64.go generated vendored Normal file
View file

@ -0,0 +1,197 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build amd64, !gccgo, !appengine
package poly1305
import (
"io"
)
var useAVX2 = supportsAVX2()
//go:noescape
func supportsAVX2() bool
//go:noescape
func initialize(state *[7]uint64, key *[32]byte)
//go:noescape
func initializeAVX2(state *[512]byte, key *[32]byte)
//go:noescape
func update(state *[7]uint64, msg []byte)
//go:noescape
func updateAVX2(state *[512]byte, msg []byte)
//go:noescape
func finalize(tag *[TagSize]byte, state *[7]uint64)
//go:noescape
func finalizeAVX2(tag *[TagSize]byte, state *[512]byte)
// compiler asserts - check that poly1305Hash and poly1305HashAVX2 implements the hash interface
var (
_ (hash) = &poly1305Hash{}
_ (hash) = &poly1305HashAVX2{}
)
type hash interface {
io.Writer
Sum(b []byte) []byte
}
// Sum generates an authenticator for msg using a one-time key and returns the
// 16-byte result. Authenticating two different messages with the same key allows
// an attacker to forge messages at will.
func Sum(msg []byte, key [32]byte) [TagSize]byte {
if len(msg) == 0 {
msg = []byte{}
}
var out [TagSize]byte
if useAVX2 && len(msg) > 8*TagSize {
var state [512]byte
initializeAVX2(&state, &key)
updateAVX2(&state, msg)
finalizeAVX2(&out, &state)
} else {
var state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
initialize(&state, &key)
update(&state, msg)
finalize(&out, &state)
}
return out
}
// New returns a Hash computing the poly1305 sum.
// Notice that Poly1305 is insecure if one key is used twice.
func New(key [32]byte) *Hash {
if useAVX2 {
h := new(poly1305HashAVX2)
initializeAVX2(&(h.state), &key)
return &Hash{h, false}
}
h := new(poly1305Hash)
initialize(&(h.state), &key)
return &Hash{h, false}
}
// Hash implements the poly1305 authenticator.
// Poly1305 cannot be used like common hash.Hash implementations,
// because using a poly1305 key twice breaks its security.
type Hash struct {
hash
done bool
}
// Size returns the number of bytes Sum will append.
func (h *Hash) Size() int { return TagSize }
// Write adds more data to the running Poly1305 hash.
// This function should return a non-nil error if a call
// to Write happens after a call to Sum. So it is not possible
// to compute the checksum and than add more data.
func (h *Hash) Write(msg []byte) (int, error) {
if h.done {
return 0, errWriteAfterSum
}
return h.hash.Write(msg)
}
// Sum appends the Poly1305 hash of the previously
// processed data to b and returns the resulting slice.
// It is safe to call this function multiple times.
func (h *Hash) Sum(b []byte) []byte {
b = h.hash.Sum(b)
h.done = true
return b
}
type poly1305Hash struct {
state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
buf [TagSize]byte
off int
}
func (h *poly1305Hash) Write(p []byte) (n int, err error) {
n = len(p)
if h.off > 0 {
dif := TagSize - h.off
if n <= dif {
h.off += copy(h.buf[h.off:], p)
return n, nil
}
copy(h.buf[h.off:], p[:dif])
update(&(h.state), h.buf[:])
p = p[dif:]
h.off = 0
}
// process full 16-byte blocks
if nn := len(p) & (^(TagSize - 1)); nn > 0 {
update(&(h.state), p[:nn])
p = p[nn:]
}
if len(p) > 0 {
h.off += copy(h.buf[h.off:], p)
}
return
}
func (h *poly1305Hash) Sum(b []byte) []byte {
var out [TagSize]byte
state := h.state
if h.off > 0 {
update(&state, h.buf[:h.off])
}
finalize(&out, &state)
return append(b, out[:]...)
}
type poly1305HashAVX2 struct {
// r[0] | r^2[0] | r[1] | r^2[1] | r[2] | r^2[2] | r[3] | r^2[3] | r[4] | r^2[4] | r[1]*5 | r^2[1]*5 | r[2]*5 | r^2[2]*5 r[3]*5 | r^2[3]*5 r[4]*5 | r^2[4]*5
state [512]byte
buffer [8 * TagSize]byte
offset int
}
func (h *poly1305HashAVX2) Write(p []byte) (n int, err error) {
n = len(p)
if h.offset > 0 {
remaining := 8*TagSize - h.offset
if n <= remaining {
h.offset += copy(h.buffer[h.offset:], p)
return n, nil
}
copy(h.buffer[h.offset:], p[:remaining])
updateAVX2(&h.state, h.buffer[:])
p = p[remaining:]
h.offset = 0
}
// process full 8*16-byte blocks
if nn := len(p) & (^(8*TagSize - 1)); nn > 0 {
updateAVX2(&h.state, p[:nn])
p = p[nn:]
}
if len(p) > 0 {
h.offset += copy(h.buffer[:], p)
}
return
}
func (h *poly1305HashAVX2) Sum(b []byte) []byte {
var out [TagSize]byte
state := h.state
if h.offset > 0 {
updateAVX2(&state, h.buffer[:h.offset])
}
finalizeAVX2(&out, &state)
return append(b, out[:]...)
}

155
vendor/github.com/aead/poly1305/poly1305_amd64.s generated vendored Normal file
View file

@ -0,0 +1,155 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build amd64, !gccgo, !appengine
#include "textflag.h"
DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
GLOBL ·poly1305Mask<>(SB), RODATA, $16
#define POLY1305_ADD(msg, h0, h1, h2) \
ADDQ 0(msg), h0; \
ADCQ 8(msg), h1; \
ADCQ $1, h2; \
LEAQ 16(msg), msg
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
MOVQ r0, AX; \
MULQ h0; \
MOVQ AX, t0; \
MOVQ DX, t1; \
MOVQ r0, AX; \
MULQ h1; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ r0, t2; \
IMULQ h2, t2; \
ADDQ DX, t2; \
\
MOVQ r1, AX; \
MULQ h0; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ DX, h0; \
MOVQ r1, t3; \
IMULQ h2, t3; \
MOVQ r1, AX; \
MULQ h1; \
ADDQ AX, t2; \
ADCQ DX, t3; \
ADDQ h0, t2; \
ADCQ $0, t3; \
\
MOVQ t0, h0; \
MOVQ t1, h1; \
MOVQ t2, h2; \
ANDQ $3, h2; \
MOVQ t2, t0; \
ANDQ $0XFFFFFFFFFFFFFFFC, t0; \
ADDQ t0, h0; \
ADCQ t3, h1; \
ADCQ $0, h2; \
SHRQ $2, t3, t2; \
SHRQ $2, t3; \
ADDQ t2, h0; \
ADCQ t3, h1; \
ADCQ $0, h2
// func update(state *[7]uint64, msg []byte)
TEXT ·update(SB), $0-32
MOVQ state+0(FP), DI
MOVQ msg_base+8(FP), SI
MOVQ msg_len+16(FP), R15
MOVQ 0(DI), R8 // h0
MOVQ 8(DI), R9 // h1
MOVQ 16(DI), R10 // h2
MOVQ 24(DI), R11 // r0
MOVQ 32(DI), R12 // h1
CMPQ R15, $16
JB BYTES_BETWEEN_0_AND_15
LOOP:
POLY1305_ADD(SI, R8, R9, R10)
MULTIPLY:
POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
SUBQ $16, R15
CMPQ R15, $16
JAE LOOP
BYTES_BETWEEN_0_AND_15:
TESTQ R15, R15
JZ DONE
MOVQ $1, BX
XORQ CX, CX
XORQ R13, R13
ADDQ R15, SI
FLUSH_BUFFER:
SHLQ $8, BX, CX
SHLQ $8, BX
MOVB -1(SI), R13
XORQ R13, BX
DECQ SI
DECQ R15
JNZ FLUSH_BUFFER
ADDQ BX, R8
ADCQ CX, R9
ADCQ $0, R10
MOVQ $16, R15
JMP MULTIPLY
DONE:
MOVQ R8, 0(DI)
MOVQ R9, 8(DI)
MOVQ R10, 16(DI)
RET
// func initialize(state *[7]uint64, key *[32]byte)
TEXT ·initialize(SB), $0-16
MOVQ state+0(FP), DI
MOVQ key+8(FP), SI
// state[0...7] is initialized with zero
MOVOU 0(SI), X0
MOVOU 16(SI), X1
MOVOU ·poly1305Mask<>(SB), X2
PAND X2, X0
MOVOU X0, 24(DI)
MOVOU X1, 40(DI)
RET
// func finalize(tag *[TagSize]byte, state *[7]uint64)
TEXT ·finalize(SB), $0-16
MOVQ tag+0(FP), DI
MOVQ state+8(FP), SI
MOVQ 0(SI), AX
MOVQ 8(SI), BX
MOVQ 16(SI), CX
MOVQ AX, R8
MOVQ BX, R9
SUBQ $0XFFFFFFFFFFFFFFFB, AX
SBBQ $0XFFFFFFFFFFFFFFFF, BX
SBBQ $3, CX
CMOVQCS R8, AX
CMOVQCS R9, BX
ADDQ 40(SI), AX
ADCQ 48(SI), BX
MOVQ AX, 0(DI)
MOVQ BX, 8(DI)
RET
// func supportsAVX2() bool
TEXT ·supportsAVX2(SB), 4, $0-1
MOVQ runtime·support_avx2(SB), AX
MOVB AX, ret+0(FP)
RET

229
vendor/github.com/aead/poly1305/poly1305_ref.go generated vendored Normal file
View file

@ -0,0 +1,229 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build !amd64 gccgo appengine nacl
package poly1305
import "encoding/binary"
const (
msgBlock = uint32(1 << 24)
finalBlock = uint32(0)
)
// Sum generates an authenticator for msg using a one-time key and returns the
// 16-byte result. Authenticating two different messages with the same key allows
// an attacker to forge messages at will.
func Sum(msg []byte, key [32]byte) [TagSize]byte {
var (
h, r [5]uint32
s [4]uint32
)
var out [TagSize]byte
initialize(&r, &s, &key)
// process full 16-byte blocks
n := len(msg) & (^(TagSize - 1))
if n > 0 {
update(msg[:n], msgBlock, &h, &r)
msg = msg[n:]
}
if len(msg) > 0 {
var block [TagSize]byte
off := copy(block[:], msg)
block[off] = 1
update(block[:], finalBlock, &h, &r)
}
finalize(&out, &h, &s)
return out
}
// New returns a hash.Hash computing the poly1305 sum.
// Notice that Poly1305 is insecure if one key is used twice.
func New(key [32]byte) *Hash {
p := new(Hash)
initialize(&(p.r), &(p.s), &key)
return p
}
// Hash implements a Poly1305 writer interface.
// Poly1305 cannot be used like common hash.Hash implementations,
// because using a poly1305 key twice breaks its security.
// So poly1305.Hash does not support some kind of reset.
type Hash struct {
h, r [5]uint32
s [4]uint32
buf [TagSize]byte
off int
done bool
}
// Size returns the number of bytes Sum will append.
func (p *Hash) Size() int { return TagSize }
// Write adds more data to the running Poly1305 hash.
// This function should return a non-nil error if a call
// to Write happens after a call to Sum. So it is not possible
// to compute the checksum and than add more data.
func (p *Hash) Write(msg []byte) (int, error) {
if p.done {
return 0, errWriteAfterSum
}
n := len(msg)
if p.off > 0 {
dif := TagSize - p.off
if n <= dif {
p.off += copy(p.buf[p.off:], msg)
return n, nil
}
copy(p.buf[p.off:], msg[:dif])
msg = msg[dif:]
update(p.buf[:], msgBlock, &(p.h), &(p.r))
p.off = 0
}
// process full 16-byte blocks
if nn := len(msg) & (^(TagSize - 1)); nn > 0 {
update(msg[:nn], msgBlock, &(p.h), &(p.r))
msg = msg[nn:]
}
if len(msg) > 0 {
p.off += copy(p.buf[p.off:], msg)
}
return n, nil
}
// Sum appends the Pol1305 hash of the previously
// processed data to b and returns the resulting slice.
// It is safe to call this function multiple times.
func (p *Hash) Sum(b []byte) []byte {
var out [TagSize]byte
h := p.h
if p.off > 0 {
var buf [TagSize]byte
copy(buf[:], p.buf[:p.off])
buf[p.off] = 1 // invariant: p.off < TagSize
update(buf[:], finalBlock, &h, &(p.r))
}
finalize(&out, &h, &(p.s))
p.done = true
return append(b, out[:]...)
}
func initialize(r *[5]uint32, s *[4]uint32, key *[32]byte) {
r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff
r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03
r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff
r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff
r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff
s[0] = binary.LittleEndian.Uint32(key[16:])
s[1] = binary.LittleEndian.Uint32(key[20:])
s[2] = binary.LittleEndian.Uint32(key[24:])
s[3] = binary.LittleEndian.Uint32(key[28:])
}
func update(msg []byte, flag uint32, h, r *[5]uint32) {
h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4])
R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
for len(msg) > 0 {
// h += msg
h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag
// h *= r
d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
// h %= p
h0 = uint32(d0) & 0x3ffffff
h1 = uint32(d1) & 0x3ffffff
h2 = uint32(d2) & 0x3ffffff
h3 = uint32(d3) & 0x3ffffff
h4 = uint32(d4) & 0x3ffffff
h0 += uint32(d4>>26) * 5
h1 += h0 >> 26
h0 = h0 & 0x3ffffff
msg = msg[TagSize:]
}
h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4
}
func finalize(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
// h %= p reduction
h2 += h1 >> 26
h1 &= 0x3ffffff
h3 += h2 >> 26
h2 &= 0x3ffffff
h4 += h3 >> 26
h3 &= 0x3ffffff
h0 += 5 * (h4 >> 26)
h4 &= 0x3ffffff
h1 += h0 >> 26
h0 &= 0x3ffffff
// h - p
t0 := h0 + 5
t1 := h1 + (t0 >> 26)
t2 := h2 + (t1 >> 26)
t3 := h3 + (t2 >> 26)
t4 := h4 + (t3 >> 26) - (1 << 26)
t0 &= 0x3ffffff
t1 &= 0x3ffffff
t2 &= 0x3ffffff
t3 &= 0x3ffffff
// select h if h < p else h - p
t_mask := (t4 >> 31) - 1
h_mask := ^t_mask
h0 = (h0 & h_mask) | (t0 & t_mask)
h1 = (h1 & h_mask) | (t1 & t_mask)
h2 = (h2 & h_mask) | (t2 & t_mask)
h3 = (h3 & h_mask) | (t3 & t_mask)
h4 = (h4 & h_mask) | (t4 & t_mask)
// h %= 2^128
h0 |= h1 << 26
h1 = ((h1 >> 6) | (h2 << 20))
h2 = ((h2 >> 12) | (h3 << 14))
h3 = ((h3 >> 18) | (h4 << 8))
// s: the s part of the key
// tag = (h + s) % (2^128)
t := uint64(h0) + uint64(s[0])
h0 = uint32(t)
t = uint64(h1) + uint64(s[1]) + (t >> 32)
h1 = uint32(t)
t = uint64(h2) + uint64(s[2]) + (t >> 32)
h2 = uint32(t)
t = uint64(h3) + uint64(s[3]) + (t >> 32)
h3 = uint32(t)
binary.LittleEndian.PutUint32(out[0:], h0)
binary.LittleEndian.PutUint32(out[4:], h1)
binary.LittleEndian.PutUint32(out[8:], h2)
binary.LittleEndian.PutUint32(out[12:], h3)
}

157
vendor/github.com/aead/poly1305/poly1305_test.go generated vendored Normal file
View file

@ -0,0 +1,157 @@
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
package poly1305
import (
"bytes"
"encoding/hex"
"testing"
)
func fromHex(s string) []byte {
b, err := hex.DecodeString(s)
if err != nil {
panic(err)
}
return b
}
var vectors = []struct {
msg, key, tag []byte
}{
{
[]byte("Hello world!"),
[]byte("this is 32-byte key for Poly1305"),
[]byte{0xa6, 0xf7, 0x45, 0x00, 0x8f, 0x81, 0xc9, 0x16, 0xa2, 0x0d, 0xcc, 0x74, 0xee, 0xf2, 0xb2, 0xf0},
},
{
make([]byte, 32),
[]byte("this is 32-byte key for Poly1305"),
[]byte{0x49, 0xec, 0x78, 0x09, 0x0e, 0x48, 0x1e, 0xc6, 0xc2, 0x6b, 0x33, 0xb9, 0x1c, 0xcc, 0x03, 0x07},
},
{
make([]byte, 2007),
[]byte("this is 32-byte key for Poly1305"),
[]byte{0xda, 0x84, 0xbc, 0xab, 0x02, 0x67, 0x6c, 0x38, 0xcd, 0xb0, 0x15, 0x60, 0x42, 0x74, 0xc2, 0xaa},
},
{
make([]byte, 2007),
make([]byte, 32),
make([]byte, 16),
},
{
// This test triggers an edge-case. See https://go-review.googlesource.com/#/c/30101/.
[]byte{0x81, 0xd8, 0xb2, 0xe4, 0x6a, 0x25, 0x21, 0x3b, 0x58, 0xfe, 0xe4, 0x21, 0x3a, 0x2a, 0x28, 0xe9, 0x21, 0xc1, 0x2a, 0x96, 0x32, 0x51, 0x6d, 0x3b, 0x73, 0x27, 0x27, 0x27, 0xbe, 0xcf, 0x21, 0x29},
[]byte{0x3b, 0x3a, 0x29, 0xe9, 0x3b, 0x21, 0x3a, 0x5c, 0x5c, 0x3b, 0x3b, 0x05, 0x3a, 0x3a, 0x8c, 0x0d},
[]byte{0x6d, 0xc1, 0x8b, 0x8c, 0x34, 0x4c, 0xd7, 0x99, 0x27, 0x11, 0x8b, 0xbe, 0x84, 0xb7, 0xf3, 0x14},
},
// From: https://tools.ietf.org/html/rfc7539#section-2.5.2
{
fromHex("43727970746f6772617068696320466f72756d2052657365617263682047726f7570"),
fromHex("85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b"),
fromHex("a8061dc1305136c6c22b8baf0c0127a9"),
},
}
func TestVectors(t *testing.T) {
var key [32]byte
for i, v := range vectors {
msg := v.msg
copy(key[:], v.key)
out := Sum(msg, key)
if !bytes.Equal(out[:], v.tag) {
t.Errorf("Test vector %d : got: %x expected: %x", i, out[:], v.tag)
}
h := New(key)
h.Write(msg)
tag := h.Sum(nil)
if !bytes.Equal(tag[:], v.tag) {
t.Errorf("Test vector %d : got: %x expected: %x", i, tag[:], v.tag)
}
var mac [16]byte
copy(mac[:], v.tag)
if !Verify(&mac, msg, key) {
t.Errorf("Test vector %d : Verify failed", i)
}
}
}
func TestWriteAfterSum(t *testing.T) {
msg := make([]byte, 64)
for i := range msg {
h := New([32]byte{})
if _, err := h.Write(msg[:i]); err != nil {
t.Fatalf("Iteration %d: poly1305.Hash returned unexpected error: %s", i, err)
}
h.Sum(nil)
if _, err := h.Write(nil); err == nil {
t.Fatalf("Iteration %d: poly1305.Hash returned no error for write after sum", i)
}
}
}
func TestWrite(t *testing.T) {
var key [32]byte
for i := range key {
key[i] = byte(i)
}
h := New(key)
var msg1 []byte
msg0 := make([]byte, 64)
for i := range msg0 {
h.Write(msg0[:i])
msg1 = append(msg1, msg0[:i]...)
}
tag0 := h.Sum(nil)
tag1 := Sum(msg1, key)
if !bytes.Equal(tag0[:], tag1[:]) {
t.Fatalf("Sum differ from poly1305.Sum\n Sum: %s \n poly1305.Sum: %s", hex.EncodeToString(tag0[:]), hex.EncodeToString(tag1[:]))
}
}
// Benchmarks
func BenchmarkSum_64(b *testing.B) { benchmarkSum(b, 64) }
func BenchmarkSum_256(b *testing.B) { benchmarkSum(b, 256) }
func BenchmarkSum_1K(b *testing.B) { benchmarkSum(b, 1024) }
func BenchmarkSum_8K(b *testing.B) { benchmarkSum(b, 8*1024) }
func BenchmarkWrite_64(b *testing.B) { benchmarkWrite(b, 64) }
func BenchmarkWrite_256(b *testing.B) { benchmarkWrite(b, 256) }
func BenchmarkWrite_1K(b *testing.B) { benchmarkWrite(b, 1024) }
func BenchmarkWrite_8K(b *testing.B) { benchmarkWrite(b, 8*1024) }
func benchmarkSum(b *testing.B, size int) {
var key [32]byte
msg := make([]byte, size)
b.SetBytes(int64(size))
b.ResetTimer()
for i := 0; i < b.N; i++ {
Sum(msg, key)
}
}
func benchmarkWrite(b *testing.B, size int) {
var key [32]byte
h := New(key)
msg := make([]byte, size)
b.SetBytes(int64(size))
b.ResetTimer()
for i := 0; i < b.N; i++ {
h.Write(msg)
}
}