mirror of
https://github.com/DNSCrypt/dnscrypt-proxy.git
synced 2025-04-06 14:47:35 +03:00
Switch from glide to dep. Check in vendor/
This commit is contained in:
parent
9a3cd91cd7
commit
f44e11fa65
498 changed files with 74787 additions and 32 deletions
25
vendor/github.com/aead/chacha20/.gitignore
generated
vendored
Normal file
25
vendor/github.com/aead/chacha20/.gitignore
generated
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
.vscode
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
19
vendor/github.com/aead/chacha20/.travis.yml
generated
vendored
Normal file
19
vendor/github.com/aead/chacha20/.travis.yml
generated
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.5.3
|
||||
- 1.6
|
||||
- 1.7
|
||||
- 1.8
|
||||
- master
|
||||
|
||||
env:
|
||||
- TRAVIS_GOARCH=amd64
|
||||
- TRAVIS_GOARCH=386
|
||||
|
||||
before_install:
|
||||
- export GOARCH=$TRAVIS_GOARCH
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
21
vendor/github.com/aead/chacha20/LICENSE
generated
vendored
Normal file
21
vendor/github.com/aead/chacha20/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Andreas Auernhammer
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
79
vendor/github.com/aead/chacha20/README.md
generated
vendored
Normal file
79
vendor/github.com/aead/chacha20/README.md
generated
vendored
Normal file
|
@ -0,0 +1,79 @@
|
|||
[](https://godoc.org/github.com/aead/chacha20)
|
||||
|
||||
## The ChaCha20 stream cipher
|
||||
|
||||
ChaCha is a stream cipher family created by Daniel J. Bernstein.
|
||||
The most common ChaCha cipher is ChaCha20 (20 rounds). ChaCha20 is standardized in [RFC 7539](https://tools.ietf.org/html/rfc7539 "RFC 7539").
|
||||
|
||||
This package provides implementations of three ChaCha versions:
|
||||
- ChaCha20 with a 64 bit nonce (can en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
|
||||
- ChaCha20 with a 96 bit nonce (can en/decrypt up to 2^32 * 64 bytes ~ 256 GB for one key-nonce combination)
|
||||
- XChaCha20 with a 192 bit nonce (can en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
|
||||
|
||||
Furthermore the chacha subpackage implements ChaCha20/12 and ChaCha20/8.
|
||||
These versions use 12 or 8 rounds instead of 20.
|
||||
But it's recommended to use ChaCha20 (with 20 rounds) - it will be fast enough for almost all purposes.
|
||||
|
||||
### Installation
|
||||
Install in your GOPATH: `go get -u github.com/aead/chacha20`
|
||||
|
||||
### Requirements
|
||||
All go versions >= 1.5.3 are supported.
|
||||
Please notice, that the amd64 AVX2 asm implementation requires go1.7 or newer.
|
||||
|
||||
### Performance
|
||||
|
||||
#### AMD64
|
||||
Hardware: Intel i7-6500U 2.50GHz x 2
|
||||
System: Linux Ubuntu 16.04 - kernel: 4.4.0-62-generic
|
||||
Go version: 1.8.0
|
||||
```
|
||||
AVX2
|
||||
name speed cpb
|
||||
ChaCha20_64-4 573MB/s ± 0% 4.16
|
||||
ChaCha20_1K-4 2.19GB/s ± 0% 1.06
|
||||
XChaCha20_64-4 261MB/s ± 0% 9.13
|
||||
XChaCha20_1K-4 1.69GB/s ± 4% 1.37
|
||||
XORKeyStream64-4 474MB/s ± 2% 5.02
|
||||
XORKeyStream1K-4 2.09GB/s ± 1% 1.11
|
||||
XChaCha20_XORKeyStream64-4 262MB/s ± 0% 9.09
|
||||
XChaCha20_XORKeyStream1K-4 1.71GB/s ± 1% 1.36
|
||||
|
||||
SSSE3
|
||||
name speed cpb
|
||||
ChaCha20_64-4 583MB/s ± 0% 4.08
|
||||
ChaCha20_1K-4 1.15GB/s ± 1% 2.02
|
||||
XChaCha20_64-4 267MB/s ± 0% 8.92
|
||||
XChaCha20_1K-4 984MB/s ± 5% 2.42
|
||||
XORKeyStream64-4 492MB/s ± 1% 4.84
|
||||
XORKeyStream1K-4 1.10GB/s ± 5% 2.11
|
||||
XChaCha20_XORKeyStream64-4 266MB/s ± 0% 8.96
|
||||
XChaCha20_XORKeyStream1K-4 1.00GB/s ± 2% 2.32
|
||||
```
|
||||
#### 386
|
||||
Hardware: Intel i7-6500U 2.50GHz x 2
|
||||
System: Linux Ubuntu 16.04 - kernel: 4.4.0-62-generic
|
||||
Go version: 1.8.0
|
||||
```
|
||||
SSSE3
|
||||
name speed cpb
|
||||
ChaCha20_64-4 570MB/s ± 0% 4.18
|
||||
ChaCha20_1K-4 650MB/s ± 0% 3.66
|
||||
XChaCha20_64-4 223MB/s ± 0% 10.69
|
||||
XChaCha20_1K-4 584MB/s ± 1% 4.08
|
||||
XORKeyStream64-4 392MB/s ± 1% 6.08
|
||||
XORKeyStream1K-4 629MB/s ± 1% 3.79
|
||||
XChaCha20_XORKeyStream64-4 222MB/s ± 0% 10.73
|
||||
XChaCha20_XORKeyStream1K-4 585MB/s ± 0% 4.07
|
||||
|
||||
SSE2
|
||||
name speed cpb
|
||||
ChaCha20_64-4 509MB/s ± 0% 4.68
|
||||
ChaCha20_1K-4 553MB/s ± 2% 4.31
|
||||
XChaCha20_64-4 201MB/s ± 0% 11.86
|
||||
XChaCha20_1K-4 498MB/s ± 4% 4.78
|
||||
XORKeyStream64-4 359MB/s ± 1% 6.64
|
||||
XORKeyStream1K-4 545MB/s ± 0% 4.37
|
||||
XChaCha20_XORKeyStream64-4 201MB/s ± 1% 11.86
|
||||
XChaCha20_XORKeyStream1K-4 507MB/s ± 0% 4.70
|
||||
```
|
176
vendor/github.com/aead/chacha20/chacha/chacha.go
generated
vendored
Normal file
176
vendor/github.com/aead/chacha20/chacha/chacha.go
generated
vendored
Normal file
|
@ -0,0 +1,176 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Package chacha implements some low-level functions of the
|
||||
// ChaCha cipher family.
|
||||
package chacha // import "github.com/aead/chacha20/chacha"
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
)
|
||||
|
||||
const (
|
||||
// NonceSize is the size of the ChaCha20 nonce in bytes.
|
||||
NonceSize = 8
|
||||
|
||||
// INonceSize is the size of the IETF-ChaCha20 nonce in bytes.
|
||||
INonceSize = 12
|
||||
|
||||
// XNonceSize is the size of the XChaCha20 nonce in bytes.
|
||||
XNonceSize = 24
|
||||
|
||||
// KeySize is the size of the key in bytes.
|
||||
KeySize = 32
|
||||
)
|
||||
|
||||
var (
|
||||
useSSE2 bool
|
||||
useSSSE3 bool
|
||||
useAVX2 bool
|
||||
)
|
||||
|
||||
var (
|
||||
errKeySize = errors.New("chacha20/chacha: bad key length")
|
||||
errInvalidNonce = errors.New("chacha20/chacha: bad nonce length")
|
||||
)
|
||||
|
||||
func setup(state *[64]byte, nonce, key []byte) (err error) {
|
||||
if len(key) != KeySize {
|
||||
err = errKeySize
|
||||
return
|
||||
}
|
||||
var Nonce [16]byte
|
||||
switch len(nonce) {
|
||||
case NonceSize:
|
||||
copy(Nonce[8:], nonce)
|
||||
initialize(state, key, &Nonce)
|
||||
case INonceSize:
|
||||
copy(Nonce[4:], nonce)
|
||||
initialize(state, key, &Nonce)
|
||||
case XNonceSize:
|
||||
var tmpKey [32]byte
|
||||
var hNonce [16]byte
|
||||
|
||||
copy(hNonce[:], nonce[:16])
|
||||
copy(tmpKey[:], key)
|
||||
hChaCha20(&tmpKey, &hNonce, &tmpKey)
|
||||
copy(Nonce[8:], nonce[16:])
|
||||
initialize(state, tmpKey[:], &Nonce)
|
||||
|
||||
// BUG(aead): A "good" compiler will remove this (optimizations)
|
||||
// But using the provided key instead of tmpKey,
|
||||
// will change the key (-> probably confuses users)
|
||||
for i := range tmpKey {
|
||||
tmpKey[i] = 0
|
||||
}
|
||||
default:
|
||||
err = errInvalidNonce
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// XORKeyStream crypts bytes from src to dst using the given nonce and key.
|
||||
// The length of the nonce determinds the version of ChaCha20:
|
||||
// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
|
||||
// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
|
||||
// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
|
||||
// The rounds argument specifies the number of rounds performed for keystream
|
||||
// generation - valid values are 8, 12 or 20. The src and dst may be the same slice
|
||||
// but otherwise should not overlap. If len(dst) < len(src) this function panics.
|
||||
// If the nonce is neither 64, 96 nor 192 bits long, this function panics.
|
||||
func XORKeyStream(dst, src, nonce, key []byte, rounds int) {
|
||||
if rounds != 20 && rounds != 12 && rounds != 8 {
|
||||
panic("chacha20/chacha: bad number of rounds")
|
||||
}
|
||||
if len(dst) < len(src) {
|
||||
panic("chacha20/chacha: dst buffer is to small")
|
||||
}
|
||||
if len(nonce) == INonceSize && uint64(len(src)) > (1<<38) {
|
||||
panic("chacha20/chacha: src is too large")
|
||||
}
|
||||
|
||||
var block, state [64]byte
|
||||
if err := setup(&state, nonce, key); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
xorKeyStream(dst, src, &block, &state, rounds)
|
||||
}
|
||||
|
||||
// Cipher implements ChaCha20/r (XChaCha20/r) for a given number of rounds r.
|
||||
type Cipher struct {
|
||||
state, block [64]byte
|
||||
off int
|
||||
rounds int // 20 for ChaCha20
|
||||
noncesize int
|
||||
}
|
||||
|
||||
// NewCipher returns a new *chacha.Cipher implementing the ChaCha20/r or XChaCha20/r
|
||||
// (r = 8, 12 or 20) stream cipher. The nonce must be unique for one key for all time.
|
||||
// The length of the nonce determinds the version of ChaCha20:
|
||||
// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
|
||||
// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
|
||||
// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
|
||||
// If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned.
|
||||
func NewCipher(nonce, key []byte, rounds int) (*Cipher, error) {
|
||||
if rounds != 20 && rounds != 12 && rounds != 8 {
|
||||
panic("chacha20/chacha: bad number of rounds")
|
||||
}
|
||||
|
||||
c := new(Cipher)
|
||||
if err := setup(&(c.state), nonce, key); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.rounds = rounds
|
||||
|
||||
if len(nonce) == INonceSize {
|
||||
c.noncesize = INonceSize
|
||||
} else {
|
||||
c.noncesize = NonceSize
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// XORKeyStream crypts bytes from src to dst. Src and dst may be the same slice
|
||||
// but otherwise should not overlap. If len(dst) < len(src) the function panics.
|
||||
func (c *Cipher) XORKeyStream(dst, src []byte) {
|
||||
if len(dst) < len(src) {
|
||||
panic("chacha20/chacha: dst buffer is to small")
|
||||
}
|
||||
|
||||
if c.off > 0 {
|
||||
n := len(c.block[c.off:])
|
||||
if len(src) <= n {
|
||||
for i, v := range src {
|
||||
dst[i] = v ^ c.block[c.off]
|
||||
c.off++
|
||||
}
|
||||
if c.off == 64 {
|
||||
c.off = 0
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
for i, v := range c.block[c.off:] {
|
||||
dst[i] = src[i] ^ v
|
||||
}
|
||||
src = src[n:]
|
||||
dst = dst[n:]
|
||||
c.off = 0
|
||||
}
|
||||
|
||||
c.off += xorKeyStream(dst, src, &(c.block), &(c.state), c.rounds)
|
||||
}
|
||||
|
||||
// SetCounter skips ctr * 64 byte blocks. SetCounter(0) resets the cipher.
|
||||
// This function always skips the unused keystream of the current 64 byte block.
|
||||
func (c *Cipher) SetCounter(ctr uint64) {
|
||||
if c.noncesize == INonceSize {
|
||||
binary.LittleEndian.PutUint32(c.state[48:], uint32(ctr))
|
||||
} else {
|
||||
binary.LittleEndian.PutUint64(c.state[48:], ctr)
|
||||
}
|
||||
c.off = 0
|
||||
}
|
542
vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
generated
vendored
Normal file
542
vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
generated
vendored
Normal file
|
@ -0,0 +1,542 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build go1.7,amd64,!gccgo,!appengine,!nacl
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA ·sigma_AVX<>+0x00(SB)/4, $0x61707865
|
||||
DATA ·sigma_AVX<>+0x04(SB)/4, $0x3320646e
|
||||
DATA ·sigma_AVX<>+0x08(SB)/4, $0x79622d32
|
||||
DATA ·sigma_AVX<>+0x0C(SB)/4, $0x6b206574
|
||||
GLOBL ·sigma_AVX<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
DATA ·one_AVX<>+0x00(SB)/8, $1
|
||||
DATA ·one_AVX<>+0x08(SB)/8, $0
|
||||
GLOBL ·one_AVX<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
DATA ·one_AVX2<>+0x00(SB)/8, $0
|
||||
DATA ·one_AVX2<>+0x08(SB)/8, $0
|
||||
DATA ·one_AVX2<>+0x10(SB)/8, $1
|
||||
DATA ·one_AVX2<>+0x18(SB)/8, $0
|
||||
GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32
|
||||
|
||||
DATA ·two_AVX2<>+0x00(SB)/8, $2
|
||||
DATA ·two_AVX2<>+0x08(SB)/8, $0
|
||||
DATA ·two_AVX2<>+0x10(SB)/8, $2
|
||||
DATA ·two_AVX2<>+0x18(SB)/8, $0
|
||||
GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32
|
||||
|
||||
DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302
|
||||
DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
|
||||
DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302
|
||||
DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A
|
||||
GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32
|
||||
|
||||
DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003
|
||||
DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
|
||||
DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003
|
||||
DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B
|
||||
GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32
|
||||
|
||||
#define ROTL(n, t, v) \
|
||||
VPSLLD $n, v, t; \
|
||||
VPSRLD $(32-n), v, v; \
|
||||
VPXOR v, t, v
|
||||
|
||||
#define CHACHA_QROUND(v0, v1, v2, v3, t, c16, c8) \
|
||||
VPADDD v0, v1, v0; \
|
||||
VPXOR v3, v0, v3; \
|
||||
VPSHUFB c16, v3, v3; \
|
||||
VPADDD v2, v3, v2; \
|
||||
VPXOR v1, v2, v1; \
|
||||
ROTL(12, t, v1); \
|
||||
VPADDD v0, v1, v0; \
|
||||
VPXOR v3, v0, v3; \
|
||||
VPSHUFB c8, v3, v3; \
|
||||
VPADDD v2, v3, v2; \
|
||||
VPXOR v1, v2, v1; \
|
||||
ROTL(7, t, v1)
|
||||
|
||||
#define CHACHA_SHUFFLE(v1, v2, v3) \
|
||||
VPSHUFD $0x39, v1, v1; \
|
||||
VPSHUFD $0x4E, v2, v2; \
|
||||
VPSHUFD $-109, v3, v3
|
||||
|
||||
#define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
|
||||
VMOVDQU (0+off)(src), t0; \
|
||||
VPERM2I128 $32, v1, v0, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (0+off)(dst); \
|
||||
VMOVDQU (32+off)(src), t0; \
|
||||
VPERM2I128 $32, v3, v2, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (32+off)(dst); \
|
||||
VMOVDQU (64+off)(src), t0; \
|
||||
VPERM2I128 $49, v1, v0, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (64+off)(dst); \
|
||||
VMOVDQU (96+off)(src), t0; \
|
||||
VPERM2I128 $49, v3, v2, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (96+off)(dst)
|
||||
|
||||
#define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
|
||||
VMOVDQU (0+off)(src), t0; \
|
||||
VPERM2I128 $32, v1, v0, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (0+off)(dst); \
|
||||
VMOVDQU (32+off)(src), t0; \
|
||||
VPERM2I128 $32, v3, v2, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (32+off)(dst); \
|
||||
|
||||
#define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \
|
||||
VPERM2I128 $49, v1, v0, t0; \
|
||||
VMOVDQU t0, 0(dst); \
|
||||
VPERM2I128 $49, v3, v2, t0; \
|
||||
VMOVDQU t0, 32(dst)
|
||||
|
||||
#define XOR_AVX(dst, src, off, v0, v1, v2, v3, t0) \
|
||||
VPXOR 0+off(src), v0, t0; \
|
||||
VMOVDQU t0, 0+off(dst); \
|
||||
VPXOR 16+off(src), v1, t0; \
|
||||
VMOVDQU t0, 16+off(dst); \
|
||||
VPXOR 32+off(src), v2, t0; \
|
||||
VMOVDQU t0, 32+off(dst); \
|
||||
VPXOR 48+off(src), v3, t0; \
|
||||
VMOVDQU t0, 48+off(dst)
|
||||
|
||||
#define TWO 0(SP)
|
||||
#define C16 32(SP)
|
||||
#define C8 64(SP)
|
||||
#define STATE_0 96(SP)
|
||||
#define STATE_1 128(SP)
|
||||
#define STATE_2 160(SP)
|
||||
#define STATE_3 192(SP)
|
||||
#define TMP_0 224(SP)
|
||||
#define TMP_1 256(SP)
|
||||
|
||||
// func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
TEXT ·xorKeyStreamAVX2(SB), 4, $320-80
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ src_base+24(FP), SI
|
||||
MOVQ src_len+32(FP), CX
|
||||
MOVQ block+48(FP), BX
|
||||
MOVQ state+56(FP), AX
|
||||
MOVQ rounds+64(FP), DX
|
||||
|
||||
MOVQ SP, R8
|
||||
ADDQ $32, SP
|
||||
ANDQ $-32, SP
|
||||
|
||||
VMOVDQU 0(AX), Y2
|
||||
VMOVDQU 32(AX), Y3
|
||||
VPERM2I128 $0x22, Y2, Y0, Y0
|
||||
VPERM2I128 $0x33, Y2, Y1, Y1
|
||||
VPERM2I128 $0x22, Y3, Y2, Y2
|
||||
VPERM2I128 $0x33, Y3, Y3, Y3
|
||||
|
||||
TESTQ CX, CX
|
||||
JZ done
|
||||
|
||||
VMOVDQU ·one_AVX2<>(SB), Y4
|
||||
VPADDD Y4, Y3, Y3
|
||||
|
||||
VMOVDQA Y0, STATE_0
|
||||
VMOVDQA Y1, STATE_1
|
||||
VMOVDQA Y2, STATE_2
|
||||
VMOVDQA Y3, STATE_3
|
||||
|
||||
VMOVDQU ·rol16_AVX2<>(SB), Y4
|
||||
VMOVDQU ·rol8_AVX2<>(SB), Y5
|
||||
VMOVDQU ·two_AVX2<>(SB), Y6
|
||||
VMOVDQA Y4, Y14
|
||||
VMOVDQA Y5, Y15
|
||||
VMOVDQA Y4, C16
|
||||
VMOVDQA Y5, C8
|
||||
VMOVDQA Y6, TWO
|
||||
|
||||
CMPQ CX, $64
|
||||
JBE between_0_and_64
|
||||
CMPQ CX, $192
|
||||
JBE between_64_and_192
|
||||
CMPQ CX, $320
|
||||
JBE between_192_and_320
|
||||
CMPQ CX, $448
|
||||
JBE between_320_and_448
|
||||
|
||||
at_least_512:
|
||||
VMOVDQA Y0, Y4
|
||||
VMOVDQA Y1, Y5
|
||||
VMOVDQA Y2, Y6
|
||||
VPADDQ TWO, Y3, Y7
|
||||
VMOVDQA Y0, Y8
|
||||
VMOVDQA Y1, Y9
|
||||
VMOVDQA Y2, Y10
|
||||
VPADDQ TWO, Y7, Y11
|
||||
VMOVDQA Y0, Y12
|
||||
VMOVDQA Y1, Y13
|
||||
VMOVDQA Y2, Y14
|
||||
VPADDQ TWO, Y11, Y15
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_512:
|
||||
VMOVDQA Y8, TMP_0
|
||||
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y8, C16, C8)
|
||||
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y8, C16, C8)
|
||||
VMOVDQA TMP_0, Y8
|
||||
VMOVDQA Y0, TMP_0
|
||||
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y0, C16, C8)
|
||||
CHACHA_QROUND(Y12, Y13, Y14, Y15, Y0, C16, C8)
|
||||
CHACHA_SHUFFLE(Y1, Y2, Y3)
|
||||
CHACHA_SHUFFLE(Y5, Y6, Y7)
|
||||
CHACHA_SHUFFLE(Y9, Y10, Y11)
|
||||
CHACHA_SHUFFLE(Y13, Y14, Y15)
|
||||
|
||||
CHACHA_QROUND(Y12, Y13, Y14, Y15, Y0, C16, C8)
|
||||
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y0, C16, C8)
|
||||
VMOVDQA TMP_0, Y0
|
||||
VMOVDQA Y8, TMP_0
|
||||
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y8, C16, C8)
|
||||
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y8, C16, C8)
|
||||
VMOVDQA TMP_0, Y8
|
||||
CHACHA_SHUFFLE(Y3, Y2, Y1)
|
||||
CHACHA_SHUFFLE(Y7, Y6, Y5)
|
||||
CHACHA_SHUFFLE(Y11, Y10, Y9)
|
||||
CHACHA_SHUFFLE(Y15, Y14, Y13)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_512
|
||||
|
||||
VMOVDQA Y12, TMP_0
|
||||
VMOVDQA Y13, TMP_1
|
||||
VPADDD STATE_0, Y0, Y0
|
||||
VPADDD STATE_1, Y1, Y1
|
||||
VPADDD STATE_2, Y2, Y2
|
||||
VPADDD STATE_3, Y3, Y3
|
||||
XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
|
||||
VMOVDQA STATE_0, Y0
|
||||
VMOVDQA STATE_1, Y1
|
||||
VMOVDQA STATE_2, Y2
|
||||
VMOVDQA STATE_3, Y3
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD Y0, Y4, Y4
|
||||
VPADDD Y1, Y5, Y5
|
||||
VPADDD Y2, Y6, Y6
|
||||
VPADDD Y3, Y7, Y7
|
||||
XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD Y0, Y8, Y8
|
||||
VPADDD Y1, Y9, Y9
|
||||
VPADDD Y2, Y10, Y10
|
||||
VPADDD Y3, Y11, Y11
|
||||
XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD TMP_0, Y0, Y12
|
||||
VPADDD TMP_1, Y1, Y13
|
||||
VPADDD Y2, Y14, Y14
|
||||
VPADDD Y3, Y15, Y15
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
CMPQ CX, $512
|
||||
JB less_than_512
|
||||
|
||||
XOR_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
|
||||
VMOVDQA Y3, STATE_3
|
||||
ADDQ $512, SI
|
||||
ADDQ $512, DI
|
||||
SUBQ $512, CX
|
||||
CMPQ CX, $448
|
||||
JA at_least_512
|
||||
|
||||
TESTQ CX, CX
|
||||
JZ done
|
||||
|
||||
VMOVDQA C16, Y14
|
||||
VMOVDQA C8, Y15
|
||||
|
||||
CMPQ CX, $64
|
||||
JBE between_0_and_64
|
||||
CMPQ CX, $192
|
||||
JBE between_64_and_192
|
||||
CMPQ CX, $320
|
||||
JBE between_192_and_320
|
||||
JMP between_320_and_448
|
||||
|
||||
less_than_512:
|
||||
XOR_UPPER_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
|
||||
EXTRACT_LOWER(BX, Y12, Y13, Y14, Y15, Y4)
|
||||
ADDQ $448, SI
|
||||
ADDQ $448, DI
|
||||
SUBQ $448, CX
|
||||
JMP finalize
|
||||
|
||||
between_320_and_448:
|
||||
VMOVDQA Y0, Y4
|
||||
VMOVDQA Y1, Y5
|
||||
VMOVDQA Y2, Y6
|
||||
VPADDQ TWO, Y3, Y7
|
||||
VMOVDQA Y0, Y8
|
||||
VMOVDQA Y1, Y9
|
||||
VMOVDQA Y2, Y10
|
||||
VPADDQ TWO, Y7, Y11
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_384:
|
||||
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
|
||||
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE(Y1, Y2, Y3)
|
||||
CHACHA_SHUFFLE(Y5, Y6, Y7)
|
||||
CHACHA_SHUFFLE(Y9, Y10, Y11)
|
||||
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
|
||||
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE(Y3, Y2, Y1)
|
||||
CHACHA_SHUFFLE(Y7, Y6, Y5)
|
||||
CHACHA_SHUFFLE(Y11, Y10, Y9)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_384
|
||||
|
||||
VPADDD STATE_0, Y0, Y0
|
||||
VPADDD STATE_1, Y1, Y1
|
||||
VPADDD STATE_2, Y2, Y2
|
||||
VPADDD STATE_3, Y3, Y3
|
||||
XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
|
||||
VMOVDQA STATE_0, Y0
|
||||
VMOVDQA STATE_1, Y1
|
||||
VMOVDQA STATE_2, Y2
|
||||
VMOVDQA STATE_3, Y3
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD Y0, Y4, Y4
|
||||
VPADDD Y1, Y5, Y5
|
||||
VPADDD Y2, Y6, Y6
|
||||
VPADDD Y3, Y7, Y7
|
||||
XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD Y0, Y8, Y8
|
||||
VPADDD Y1, Y9, Y9
|
||||
VPADDD Y2, Y10, Y10
|
||||
VPADDD Y3, Y11, Y11
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
CMPQ CX, $384
|
||||
JB less_than_384
|
||||
|
||||
XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
SUBQ $384, CX
|
||||
TESTQ CX, CX
|
||||
JE done
|
||||
|
||||
ADDQ $384, SI
|
||||
ADDQ $384, DI
|
||||
JMP between_0_and_64
|
||||
|
||||
less_than_384:
|
||||
XOR_UPPER_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
|
||||
ADDQ $320, SI
|
||||
ADDQ $320, DI
|
||||
SUBQ $320, CX
|
||||
JMP finalize
|
||||
|
||||
between_192_and_320:
|
||||
VMOVDQA Y0, Y4
|
||||
VMOVDQA Y1, Y5
|
||||
VMOVDQA Y2, Y6
|
||||
VMOVDQA Y3, Y7
|
||||
VMOVDQA Y0, Y8
|
||||
VMOVDQA Y1, Y9
|
||||
VMOVDQA Y2, Y10
|
||||
VPADDQ TWO, Y3, Y11
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_256:
|
||||
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE(Y5, Y6, Y7)
|
||||
CHACHA_SHUFFLE(Y9, Y10, Y11)
|
||||
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE(Y7, Y6, Y5)
|
||||
CHACHA_SHUFFLE(Y11, Y10, Y9)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_256
|
||||
|
||||
VPADDD Y0, Y4, Y4
|
||||
VPADDD Y1, Y5, Y5
|
||||
VPADDD Y2, Y6, Y6
|
||||
VPADDD Y3, Y7, Y7
|
||||
VPADDQ TWO, Y3, Y3
|
||||
XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
VPADDD Y0, Y8, Y8
|
||||
VPADDD Y1, Y9, Y9
|
||||
VPADDD Y2, Y10, Y10
|
||||
VPADDD Y3, Y11, Y11
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
CMPQ CX, $256
|
||||
JB less_than_256
|
||||
|
||||
XOR_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
SUBQ $256, CX
|
||||
TESTQ CX, CX
|
||||
JE done
|
||||
|
||||
ADDQ $256, SI
|
||||
ADDQ $256, DI
|
||||
JMP between_0_and_64
|
||||
|
||||
less_than_256:
|
||||
XOR_UPPER_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
|
||||
ADDQ $192, SI
|
||||
ADDQ $192, DI
|
||||
SUBQ $192, CX
|
||||
JMP finalize
|
||||
|
||||
between_64_and_192:
|
||||
VMOVDQA Y0, Y4
|
||||
VMOVDQA Y1, Y5
|
||||
VMOVDQA Y2, Y6
|
||||
VMOVDQA Y3, Y7
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_128:
|
||||
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE(Y5, Y6, Y7)
|
||||
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE(Y7, Y6, Y5)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_128
|
||||
|
||||
VPADDD Y0, Y4, Y4
|
||||
VPADDD Y1, Y5, Y5
|
||||
VPADDD Y2, Y6, Y6
|
||||
VPADDD Y3, Y7, Y7
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
CMPQ CX, $128
|
||||
JB less_than_128
|
||||
|
||||
XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
SUBQ $128, CX
|
||||
TESTQ CX, CX
|
||||
JE done
|
||||
|
||||
ADDQ $128, SI
|
||||
ADDQ $128, DI
|
||||
JMP between_0_and_64
|
||||
|
||||
less_than_128:
|
||||
XOR_UPPER_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
EXTRACT_LOWER(BX, Y4, Y5, Y6, Y7, Y13)
|
||||
ADDQ $64, SI
|
||||
ADDQ $64, DI
|
||||
SUBQ $64, CX
|
||||
JMP finalize
|
||||
|
||||
between_0_and_64:
|
||||
VMOVDQA X0, X4
|
||||
VMOVDQA X1, X5
|
||||
VMOVDQA X2, X6
|
||||
VMOVDQA X3, X7
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_64:
|
||||
CHACHA_QROUND(X4, X5, X6, X7, X13, X14, X15)
|
||||
CHACHA_SHUFFLE(X5, X6, X7)
|
||||
CHACHA_QROUND(X4, X5, X6, X7, X13, X14, X15)
|
||||
CHACHA_SHUFFLE(X7, X6, X5)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_64
|
||||
|
||||
VPADDD X0, X4, X4
|
||||
VPADDD X1, X5, X5
|
||||
VPADDD X2, X6, X6
|
||||
VPADDD X3, X7, X7
|
||||
VMOVDQU ·one_AVX<>(SB), X0
|
||||
VPADDQ X0, X3, X3
|
||||
|
||||
CMPQ CX, $64
|
||||
JB less_than_64
|
||||
|
||||
XOR_AVX(DI, SI, 0, X4, X5, X6, X7, X13)
|
||||
SUBQ $64, CX
|
||||
JMP done
|
||||
|
||||
less_than_64:
|
||||
VMOVDQU X4, 0(BX)
|
||||
VMOVDQU X5, 16(BX)
|
||||
VMOVDQU X6, 32(BX)
|
||||
VMOVDQU X7, 48(BX)
|
||||
|
||||
finalize:
|
||||
XORQ R11, R11
|
||||
XORQ R12, R12
|
||||
MOVQ CX, BP
|
||||
|
||||
xor_loop:
|
||||
MOVB 0(SI), R11
|
||||
MOVB 0(BX), R12
|
||||
XORQ R11, R12
|
||||
MOVB R12, 0(DI)
|
||||
INCQ SI
|
||||
INCQ BX
|
||||
INCQ DI
|
||||
DECQ BP
|
||||
JA xor_loop
|
||||
|
||||
done:
|
||||
VMOVDQU X3, 48(AX)
|
||||
VZEROUPPER
|
||||
MOVQ R8, SP
|
||||
MOVQ CX, ret+72(FP)
|
||||
RET
|
||||
|
||||
// func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
TEXT ·hChaCha20AVX(SB), 4, $0-24
|
||||
MOVQ out+0(FP), DI
|
||||
MOVQ nonce+8(FP), AX
|
||||
MOVQ key+16(FP), BX
|
||||
|
||||
VMOVDQU ·sigma_AVX<>(SB), X0
|
||||
VMOVDQU 0(BX), X1
|
||||
VMOVDQU 16(BX), X2
|
||||
VMOVDQU 0(AX), X3
|
||||
VMOVDQU ·rol16_AVX2<>(SB), X5
|
||||
VMOVDQU ·rol8_AVX2<>(SB), X6
|
||||
|
||||
MOVQ $20, CX
|
||||
|
||||
chacha_loop:
|
||||
CHACHA_QROUND(X0, X1, X2, X3, X4, X5, X6)
|
||||
CHACHA_SHUFFLE(X1, X2, X3)
|
||||
CHACHA_QROUND(X0, X1, X2, X3, X4, X5, X6)
|
||||
CHACHA_SHUFFLE(X3, X2, X1)
|
||||
SUBQ $2, CX
|
||||
JNZ chacha_loop
|
||||
|
||||
VMOVDQU X0, 0(DI)
|
||||
VMOVDQU X3, 16(DI)
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
// func supportsAVX2() bool
|
||||
TEXT ·supportsAVX2(SB), 4, $0-1
|
||||
MOVQ runtime·support_avx(SB), AX
|
||||
MOVQ runtime·support_avx2(SB), BX
|
||||
ANDQ AX, BX
|
||||
MOVB BX, ret+0(FP)
|
||||
RET
|
67
vendor/github.com/aead/chacha20/chacha/chacha_386.go
generated
vendored
Normal file
67
vendor/github.com/aead/chacha20/chacha/chacha_386.go
generated
vendored
Normal file
|
@ -0,0 +1,67 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build 386,!gccgo,!appengine,!nacl
|
||||
|
||||
package chacha
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
func init() {
|
||||
useSSE2 = supportsSSE2()
|
||||
useSSSE3 = supportsSSSE3()
|
||||
useAVX2 = false
|
||||
}
|
||||
|
||||
func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
|
||||
binary.LittleEndian.PutUint32(state[0:], sigma[0])
|
||||
binary.LittleEndian.PutUint32(state[4:], sigma[1])
|
||||
binary.LittleEndian.PutUint32(state[8:], sigma[2])
|
||||
binary.LittleEndian.PutUint32(state[12:], sigma[3])
|
||||
copy(state[16:], key[:])
|
||||
copy(state[48:], nonce[:])
|
||||
}
|
||||
|
||||
// This function is implemented in chacha_386.s
|
||||
//go:noescape
|
||||
func supportsSSE2() bool
|
||||
|
||||
// This function is implemented in chacha_386.s
|
||||
//go:noescape
|
||||
func supportsSSSE3() bool
|
||||
|
||||
// This function is implemented in chacha_386.s
|
||||
//go:noescape
|
||||
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_386.s
|
||||
//go:noescape
|
||||
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_386.s
|
||||
//go:noescape
|
||||
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
// This function is implemented in chacha_386.s
|
||||
//go:noescape
|
||||
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
|
||||
if useSSSE3 {
|
||||
hChaCha20SSSE3(out, nonce, key)
|
||||
} else if useSSE2 {
|
||||
hChaCha20SSE2(out, nonce, key)
|
||||
} else {
|
||||
hChaCha20Generic(out, nonce, key)
|
||||
}
|
||||
}
|
||||
|
||||
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
|
||||
if useSSSE3 {
|
||||
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
|
||||
} else if useSSE2 {
|
||||
return xorKeyStreamSSE2(dst, src, block, state, rounds)
|
||||
}
|
||||
return xorKeyStreamGeneric(dst, src, block, state, rounds)
|
||||
}
|
311
vendor/github.com/aead/chacha20/chacha/chacha_386.s
generated
vendored
Normal file
311
vendor/github.com/aead/chacha20/chacha/chacha_386.s
generated
vendored
Normal file
|
@ -0,0 +1,311 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build 386,!gccgo,!appengine,!nacl
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA ·sigma<>+0x00(SB)/4, $0x61707865
|
||||
DATA ·sigma<>+0x04(SB)/4, $0x3320646e
|
||||
DATA ·sigma<>+0x08(SB)/4, $0x79622d32
|
||||
DATA ·sigma<>+0x0C(SB)/4, $0x6b206574
|
||||
GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
DATA ·one<>+0x00(SB)/8, $1
|
||||
DATA ·one<>+0x08(SB)/8, $0
|
||||
GLOBL ·one<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302
|
||||
DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
|
||||
GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003
|
||||
DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
|
||||
GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
#define ROTL_SSE2(n, t, v) \
|
||||
MOVO v, t; \
|
||||
PSLLL $n, t; \
|
||||
PSRLL $(32-n), v; \
|
||||
PXOR t, v
|
||||
|
||||
#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t0) \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
ROTL_SSE2(16, t0, v3); \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE2(12, t0, v1); \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
ROTL_SSE2(8, t0, v3); \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE2(7, t0, v1)
|
||||
|
||||
#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t0, r16, r8) \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
PSHUFB r16, v3; \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE2(12, t0, v1); \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
PSHUFB r8, v3; \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE2(7, t0, v1)
|
||||
|
||||
#define CHACHA_SHUFFLE(v1, v2, v3) \
|
||||
PSHUFL $0x39, v1, v1; \
|
||||
PSHUFL $0x4E, v2, v2; \
|
||||
PSHUFL $0x93, v3, v3
|
||||
|
||||
#define XOR(dst, src, off, v0, v1, v2, v3, t0) \
|
||||
MOVOU 0+off(src), t0; \
|
||||
PXOR v0, t0; \
|
||||
MOVOU t0, 0+off(dst); \
|
||||
MOVOU 16+off(src), t0; \
|
||||
PXOR v1, t0; \
|
||||
MOVOU t0, 16+off(dst); \
|
||||
MOVOU 32+off(src), t0; \
|
||||
PXOR v2, t0; \
|
||||
MOVOU t0, 32+off(dst); \
|
||||
MOVOU 48+off(src), t0; \
|
||||
PXOR v3, t0; \
|
||||
MOVOU t0, 48+off(dst)
|
||||
|
||||
#define FINALIZE(dst, src, block, len, t0, t1) \
|
||||
XORL t0, t0; \
|
||||
XORL t1, t1; \
|
||||
finalize: \
|
||||
MOVB 0(src), t0; \
|
||||
MOVB 0(block), t1; \
|
||||
XORL t0, t1; \
|
||||
MOVB t1, 0(dst); \
|
||||
INCL src; \
|
||||
INCL block; \
|
||||
INCL dst; \
|
||||
DECL len; \
|
||||
JA finalize \
|
||||
|
||||
// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
TEXT ·xorKeyStreamSSE2(SB), 4, $0-40
|
||||
MOVL dst_base+0(FP), DI
|
||||
MOVL src_base+12(FP), SI
|
||||
MOVL src_len+16(FP), CX
|
||||
MOVL state+28(FP), AX
|
||||
MOVL rounds+32(FP), DX
|
||||
|
||||
MOVOU 0(AX), X0
|
||||
MOVOU 16(AX), X1
|
||||
MOVOU 32(AX), X2
|
||||
MOVOU 48(AX), X3
|
||||
|
||||
TESTL CX, CX
|
||||
JZ done
|
||||
|
||||
at_least_64:
|
||||
MOVO X0, X4
|
||||
MOVO X1, X5
|
||||
MOVO X2, X6
|
||||
MOVO X3, X7
|
||||
|
||||
MOVL DX, BX
|
||||
|
||||
chacha_loop:
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
|
||||
CHACHA_SHUFFLE(X5, X6, X7)
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
|
||||
CHACHA_SHUFFLE(X7, X6, X5)
|
||||
SUBL $2, BX
|
||||
JA chacha_loop
|
||||
|
||||
MOVOU 0(AX), X0
|
||||
PADDL X0, X4
|
||||
PADDL X1, X5
|
||||
PADDL X2, X6
|
||||
PADDL X3, X7
|
||||
MOVOU ·one<>(SB), X0
|
||||
PADDQ X0, X3
|
||||
|
||||
CMPL CX, $64
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 0, X4, X5, X6, X7, X0)
|
||||
MOVOU 0(AX), X0
|
||||
ADDL $64, SI
|
||||
ADDL $64, DI
|
||||
SUBL $64, CX
|
||||
JNZ at_least_64
|
||||
|
||||
less_than_64:
|
||||
MOVL CX, BP
|
||||
TESTL BP, BP
|
||||
JZ done
|
||||
|
||||
MOVL block+24(FP), BX
|
||||
MOVOU X4, 0(BX)
|
||||
MOVOU X5, 16(BX)
|
||||
MOVOU X6, 32(BX)
|
||||
MOVOU X7, 48(BX)
|
||||
FINALIZE(DI, SI, BX, BP, AX, DX)
|
||||
|
||||
done:
|
||||
MOVL state+28(FP), AX
|
||||
MOVOU X3, 48(AX)
|
||||
MOVL CX, ret+36(FP)
|
||||
RET
|
||||
|
||||
// func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
TEXT ·xorKeyStreamSSSE3(SB), 4, $64-40
|
||||
MOVL dst_base+0(FP), DI
|
||||
MOVL src_base+12(FP), SI
|
||||
MOVL src_len+16(FP), CX
|
||||
MOVL state+28(FP), AX
|
||||
MOVL rounds+32(FP), DX
|
||||
|
||||
MOVOU 48(AX), X3
|
||||
TESTL CX, CX
|
||||
JZ done
|
||||
|
||||
MOVL SP, BP
|
||||
ADDL $16, SP
|
||||
ANDL $-16, SP
|
||||
|
||||
MOVOU ·one<>(SB), X0
|
||||
MOVOU 16(AX), X1
|
||||
MOVOU 32(AX), X2
|
||||
MOVO X0, 0(SP)
|
||||
MOVO X1, 16(SP)
|
||||
MOVO X2, 32(SP)
|
||||
|
||||
MOVOU 0(AX), X0
|
||||
MOVOU ·rol16<>(SB), X1
|
||||
MOVOU ·rol8<>(SB), X2
|
||||
|
||||
at_least_64:
|
||||
MOVO X0, X4
|
||||
MOVO 16(SP), X5
|
||||
MOVO 32(SP), X6
|
||||
MOVO X3, X7
|
||||
|
||||
MOVL DX, BX
|
||||
|
||||
chacha_loop:
|
||||
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
|
||||
CHACHA_SHUFFLE(X5, X6, X7)
|
||||
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
|
||||
CHACHA_SHUFFLE(X7, X6, X5)
|
||||
SUBL $2, BX
|
||||
JA chacha_loop
|
||||
|
||||
MOVOU 0(AX), X0
|
||||
PADDL X0, X4
|
||||
PADDL 16(SP), X5
|
||||
PADDL 32(SP), X6
|
||||
PADDL X3, X7
|
||||
PADDQ 0(SP), X3
|
||||
|
||||
CMPL CX, $64
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 0, X4, X5, X6, X7, X0)
|
||||
MOVOU 0(AX), X0
|
||||
ADDL $64, SI
|
||||
ADDL $64, DI
|
||||
SUBL $64, CX
|
||||
JNZ at_least_64
|
||||
|
||||
less_than_64:
|
||||
MOVL BP, SP
|
||||
MOVL CX, BP
|
||||
TESTL BP, BP
|
||||
JE done
|
||||
|
||||
MOVL block+24(FP), BX
|
||||
MOVOU X4, 0(BX)
|
||||
MOVOU X5, 16(BX)
|
||||
MOVOU X6, 32(BX)
|
||||
MOVOU X7, 48(BX)
|
||||
FINALIZE(DI, SI, BX, BP, AX, DX)
|
||||
|
||||
done:
|
||||
MOVL state+28(FP), AX
|
||||
MOVOU X3, 48(AX)
|
||||
MOVL CX, ret+36(FP)
|
||||
RET
|
||||
|
||||
// func supportsSSE2() bool
|
||||
TEXT ·supportsSSE2(SB), NOSPLIT, $0-1
|
||||
XORL AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRL $26, DX
|
||||
ANDL $1, DX
|
||||
MOVB DX, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func supportsSSSE3() bool
|
||||
TEXT ·supportsSSSE3(SB), NOSPLIT, $0-1
|
||||
XORL AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRL $9, CX
|
||||
ANDL $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
TEXT ·hChaCha20SSE2(SB), 4, $0-12
|
||||
MOVL out+0(FP), DI
|
||||
MOVL nonce+4(FP), AX
|
||||
MOVL key+8(FP), BX
|
||||
|
||||
MOVOU ·sigma<>(SB), X0
|
||||
MOVOU 0(BX), X1
|
||||
MOVOU 16(BX), X2
|
||||
MOVOU 0(AX), X3
|
||||
|
||||
MOVL $20, CX
|
||||
|
||||
chacha_loop:
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
|
||||
CHACHA_SHUFFLE(X1, X2, X3)
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
|
||||
CHACHA_SHUFFLE(X3, X2, X1)
|
||||
SUBL $2, CX
|
||||
JNZ chacha_loop
|
||||
|
||||
MOVOU X0, 0(DI)
|
||||
MOVOU X3, 16(DI)
|
||||
RET
|
||||
|
||||
// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
TEXT ·hChaCha20SSSE3(SB), 4, $0-12
|
||||
MOVL out+0(FP), DI
|
||||
MOVL nonce+4(FP), AX
|
||||
MOVL key+8(FP), BX
|
||||
|
||||
MOVOU ·sigma<>(SB), X0
|
||||
MOVOU 0(BX), X1
|
||||
MOVOU 16(BX), X2
|
||||
MOVOU 0(AX), X3
|
||||
MOVOU ·rol16<>(SB), X5
|
||||
MOVOU ·rol8<>(SB), X6
|
||||
|
||||
MOVL $20, CX
|
||||
|
||||
chacha_loop:
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
|
||||
CHACHA_SHUFFLE(X1, X2, X3)
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
|
||||
CHACHA_SHUFFLE(X3, X2, X1)
|
||||
SUBL $2, CX
|
||||
JNZ chacha_loop
|
||||
|
||||
MOVOU X0, 0(DI)
|
||||
MOVOU X3, 16(DI)
|
||||
RET
|
788
vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
generated
vendored
Normal file
788
vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
generated
vendored
Normal file
|
@ -0,0 +1,788 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build amd64,!gccgo,!appengine,!nacl
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA ·sigma<>+0x00(SB)/4, $0x61707865
|
||||
DATA ·sigma<>+0x04(SB)/4, $0x3320646e
|
||||
DATA ·sigma<>+0x08(SB)/4, $0x79622d32
|
||||
DATA ·sigma<>+0x0C(SB)/4, $0x6b206574
|
||||
GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
DATA ·one<>+0x00(SB)/8, $1
|
||||
DATA ·one<>+0x08(SB)/8, $0
|
||||
GLOBL ·one<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302
|
||||
DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
|
||||
GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003
|
||||
DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
|
||||
GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
#define ROTL_SSE2(n, t, v) \
|
||||
MOVO v, t; \
|
||||
PSLLL $n, t; \
|
||||
PSRLL $(32-n), v; \
|
||||
PXOR t, v
|
||||
|
||||
#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t0) \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
ROTL_SSE2(16, t0, v3); \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE2(12, t0, v1); \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
ROTL_SSE2(8, t0, v3); \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE2(7, t0, v1)
|
||||
|
||||
#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t0, r16, r8) \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
PSHUFB r16, v3; \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE2(12, t0, v1); \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
PSHUFB r8, v3; \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE2(7, t0, v1)
|
||||
|
||||
#define CHACHA_SHUFFLE(v1, v2, v3) \
|
||||
PSHUFL $0x39, v1, v1; \
|
||||
PSHUFL $0x4E, v2, v2; \
|
||||
PSHUFL $0x93, v3, v3
|
||||
|
||||
#define XOR(dst, src, off, v0, v1, v2, v3, t0) \
|
||||
MOVOU 0+off(src), t0; \
|
||||
PXOR v0, t0; \
|
||||
MOVOU t0, 0+off(dst); \
|
||||
MOVOU 16+off(src), t0; \
|
||||
PXOR v1, t0; \
|
||||
MOVOU t0, 16+off(dst); \
|
||||
MOVOU 32+off(src), t0; \
|
||||
PXOR v2, t0; \
|
||||
MOVOU t0, 32+off(dst); \
|
||||
MOVOU 48+off(src), t0; \
|
||||
PXOR v3, t0; \
|
||||
MOVOU t0, 48+off(dst)
|
||||
|
||||
// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
TEXT ·xorKeyStreamSSE2(SB), 4, $112-80
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ src_base+24(FP), SI
|
||||
MOVQ src_len+32(FP), CX
|
||||
MOVQ block+48(FP), BX
|
||||
MOVQ state+56(FP), AX
|
||||
MOVQ rounds+64(FP), DX
|
||||
|
||||
MOVQ SP, R9
|
||||
ADDQ $16, SP
|
||||
ANDQ $-16, SP
|
||||
|
||||
MOVOU 0(AX), X0
|
||||
MOVOU 16(AX), X1
|
||||
MOVOU 32(AX), X2
|
||||
MOVOU 48(AX), X3
|
||||
MOVOU ·one<>(SB), X15
|
||||
|
||||
TESTQ CX, CX
|
||||
JZ done
|
||||
|
||||
CMPQ CX, $64
|
||||
JBE between_0_and_64
|
||||
|
||||
CMPQ CX, $128
|
||||
JBE between_64_and_128
|
||||
|
||||
MOVO X0, 0(SP)
|
||||
MOVO X1, 16(SP)
|
||||
MOVO X2, 32(SP)
|
||||
MOVO X3, 48(SP)
|
||||
MOVO X15, 64(SP)
|
||||
|
||||
CMPQ CX, $192
|
||||
JBE between_128_and_192
|
||||
|
||||
MOVQ $192, R14
|
||||
|
||||
at_least_256:
|
||||
MOVO X0, X4
|
||||
MOVO X1, X5
|
||||
MOVO X2, X6
|
||||
MOVO X3, X7
|
||||
PADDQ 64(SP), X7
|
||||
MOVO X0, X12
|
||||
MOVO X1, X13
|
||||
MOVO X2, X14
|
||||
MOVO X7, X15
|
||||
PADDQ 64(SP), X15
|
||||
MOVO X0, X8
|
||||
MOVO X1, X9
|
||||
MOVO X2, X10
|
||||
MOVO X15, X11
|
||||
PADDQ 64(SP), X11
|
||||
|
||||
MOVQ DX, R8
|
||||
|
||||
chacha_loop_256:
|
||||
MOVO X8, 80(SP)
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X8)
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X8)
|
||||
MOVO 80(SP), X8
|
||||
|
||||
MOVO X0, 80(SP)
|
||||
CHACHA_QROUND_SSE2(X12, X13, X14, X15, X0)
|
||||
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0)
|
||||
MOVO 80(SP), X0
|
||||
|
||||
CHACHA_SHUFFLE(X1, X2, X3)
|
||||
CHACHA_SHUFFLE(X5, X6, X7)
|
||||
CHACHA_SHUFFLE(X13, X14, X15)
|
||||
CHACHA_SHUFFLE(X9, X10, X11)
|
||||
|
||||
MOVO X8, 80(SP)
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X8)
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X8)
|
||||
MOVO 80(SP), X8
|
||||
|
||||
MOVO X0, 80(SP)
|
||||
CHACHA_QROUND_SSE2(X12, X13, X14, X15, X0)
|
||||
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X0)
|
||||
MOVO 80(SP), X0
|
||||
|
||||
CHACHA_SHUFFLE(X3, X2, X1)
|
||||
CHACHA_SHUFFLE(X7, X6, X5)
|
||||
CHACHA_SHUFFLE(X15, X14, X13)
|
||||
CHACHA_SHUFFLE(X11, X10, X9)
|
||||
SUBQ $2, R8
|
||||
JA chacha_loop_256
|
||||
|
||||
MOVO X8, 80(SP)
|
||||
|
||||
PADDL 0(SP), X0
|
||||
PADDL 16(SP), X1
|
||||
PADDL 32(SP), X2
|
||||
PADDL 48(SP), X3
|
||||
XOR(DI, SI, 0, X0, X1, X2, X3, X8)
|
||||
|
||||
MOVO 0(SP), X0
|
||||
MOVO 16(SP), X1
|
||||
MOVO 32(SP), X2
|
||||
MOVO 48(SP), X3
|
||||
PADDQ 64(SP), X3
|
||||
|
||||
PADDL X0, X4
|
||||
PADDL X1, X5
|
||||
PADDL X2, X6
|
||||
PADDL X3, X7
|
||||
PADDQ 64(SP), X3
|
||||
XOR(DI, SI, 64, X4, X5, X6, X7, X8)
|
||||
|
||||
MOVO 64(SP), X5
|
||||
MOVO 80(SP), X8
|
||||
|
||||
PADDL X0, X12
|
||||
PADDL X1, X13
|
||||
PADDL X2, X14
|
||||
PADDL X3, X15
|
||||
PADDQ X5, X3
|
||||
XOR(DI, SI, 128, X12, X13, X14, X15, X4)
|
||||
|
||||
PADDL X0, X8
|
||||
PADDL X1, X9
|
||||
PADDL X2, X10
|
||||
PADDL X3, X11
|
||||
PADDQ X5, X3
|
||||
|
||||
CMPQ CX, $256
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 192, X8, X9, X10, X11, X4)
|
||||
MOVO X3, 48(SP)
|
||||
ADDQ $256, SI
|
||||
ADDQ $256, DI
|
||||
SUBQ $256, CX
|
||||
CMPQ CX, $192
|
||||
JA at_least_256
|
||||
|
||||
TESTQ CX, CX
|
||||
JZ done
|
||||
MOVO 64(SP), X15
|
||||
CMPQ CX, $64
|
||||
JBE between_0_and_64
|
||||
CMPQ CX, $128
|
||||
JBE between_64_and_128
|
||||
|
||||
between_128_and_192:
|
||||
MOVQ $128, R14
|
||||
MOVO X0, X4
|
||||
MOVO X1, X5
|
||||
MOVO X2, X6
|
||||
MOVO X3, X7
|
||||
PADDQ X15, X7
|
||||
MOVO X0, X8
|
||||
MOVO X1, X9
|
||||
MOVO X2, X10
|
||||
MOVO X7, X11
|
||||
PADDQ X15, X11
|
||||
|
||||
MOVQ DX, R8
|
||||
|
||||
chacha_loop_192:
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X12)
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
|
||||
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
|
||||
CHACHA_SHUFFLE(X1, X2, X3)
|
||||
CHACHA_SHUFFLE(X5, X6, X7)
|
||||
CHACHA_SHUFFLE(X9, X10, X11)
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X12)
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
|
||||
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
|
||||
CHACHA_SHUFFLE(X3, X2, X1)
|
||||
CHACHA_SHUFFLE(X7, X6, X5)
|
||||
CHACHA_SHUFFLE(X11, X10, X9)
|
||||
SUBQ $2, R8
|
||||
JA chacha_loop_192
|
||||
|
||||
PADDL 0(SP), X0
|
||||
PADDL 16(SP), X1
|
||||
PADDL 32(SP), X2
|
||||
PADDL 48(SP), X3
|
||||
XOR(DI, SI, 0, X0, X1, X2, X3, X12)
|
||||
|
||||
MOVO 0(SP), X0
|
||||
MOVO 16(SP), X1
|
||||
MOVO 32(SP), X2
|
||||
MOVO 48(SP), X3
|
||||
PADDQ X15, X3
|
||||
|
||||
PADDL X0, X4
|
||||
PADDL X1, X5
|
||||
PADDL X2, X6
|
||||
PADDL X3, X7
|
||||
PADDQ X15, X3
|
||||
XOR(DI, SI, 64, X4, X5, X6, X7, X12)
|
||||
|
||||
PADDL X0, X8
|
||||
PADDL X1, X9
|
||||
PADDL X2, X10
|
||||
PADDL X3, X11
|
||||
PADDQ X15, X3
|
||||
|
||||
CMPQ CX, $192
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 128, X8, X9, X10, X11, X12)
|
||||
SUBQ $192, CX
|
||||
JMP done
|
||||
|
||||
between_64_and_128:
|
||||
MOVQ $64, R14
|
||||
MOVO X0, X4
|
||||
MOVO X1, X5
|
||||
MOVO X2, X6
|
||||
MOVO X3, X7
|
||||
MOVO X0, X8
|
||||
MOVO X1, X9
|
||||
MOVO X2, X10
|
||||
MOVO X3, X11
|
||||
PADDQ X15, X11
|
||||
|
||||
MOVQ DX, R8
|
||||
|
||||
chacha_loop_128:
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
|
||||
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
|
||||
CHACHA_SHUFFLE(X5, X6, X7)
|
||||
CHACHA_SHUFFLE(X9, X10, X11)
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X12)
|
||||
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
|
||||
CHACHA_SHUFFLE(X7, X6, X5)
|
||||
CHACHA_SHUFFLE(X11, X10, X9)
|
||||
SUBQ $2, R8
|
||||
JA chacha_loop_128
|
||||
|
||||
PADDL X0, X4
|
||||
PADDL X1, X5
|
||||
PADDL X2, X6
|
||||
PADDL X3, X7
|
||||
PADDQ X15, X3
|
||||
PADDL X0, X8
|
||||
PADDL X1, X9
|
||||
PADDL X2, X10
|
||||
PADDL X3, X11
|
||||
PADDQ X15, X3
|
||||
XOR(DI, SI, 0, X4, X5, X6, X7, X12)
|
||||
|
||||
CMPQ CX, $128
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 64, X8, X9, X10, X11, X12)
|
||||
SUBQ $128, CX
|
||||
JMP done
|
||||
|
||||
between_0_and_64:
|
||||
MOVQ $0, R14
|
||||
MOVO X0, X8
|
||||
MOVO X1, X9
|
||||
MOVO X2, X10
|
||||
MOVO X3, X11
|
||||
MOVQ DX, R8
|
||||
|
||||
chacha_loop_64:
|
||||
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
|
||||
CHACHA_SHUFFLE(X9, X10, X11)
|
||||
CHACHA_QROUND_SSE2(X8, X9, X10, X11, X12)
|
||||
CHACHA_SHUFFLE(X11, X10, X9)
|
||||
SUBQ $2, R8
|
||||
JA chacha_loop_64
|
||||
|
||||
PADDL X0, X8
|
||||
PADDL X1, X9
|
||||
PADDL X2, X10
|
||||
PADDL X3, X11
|
||||
PADDQ X15, X3
|
||||
CMPQ CX, $64
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 0, X8, X9, X10, X11, X12)
|
||||
SUBQ $64, CX
|
||||
JMP done
|
||||
|
||||
less_than_64:
|
||||
// R14 contains the num of bytes already xor'd
|
||||
ADDQ R14, SI
|
||||
ADDQ R14, DI
|
||||
SUBQ R14, CX
|
||||
MOVOU X8, 0(BX)
|
||||
MOVOU X9, 16(BX)
|
||||
MOVOU X10, 32(BX)
|
||||
MOVOU X11, 48(BX)
|
||||
XORQ R11, R11
|
||||
XORQ R12, R12
|
||||
MOVQ CX, BP
|
||||
|
||||
xor_loop:
|
||||
MOVB 0(SI), R11
|
||||
MOVB 0(BX), R12
|
||||
XORQ R11, R12
|
||||
MOVB R12, 0(DI)
|
||||
INCQ SI
|
||||
INCQ BX
|
||||
INCQ DI
|
||||
DECQ BP
|
||||
JA xor_loop
|
||||
|
||||
done:
|
||||
MOVOU X3, 48(AX)
|
||||
MOVQ R9, SP
|
||||
MOVQ CX, ret+72(FP)
|
||||
RET
|
||||
|
||||
// func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
TEXT ·xorKeyStreamSSSE3(SB), 4, $144-80
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ src_base+24(FP), SI
|
||||
MOVQ src_len+32(FP), CX
|
||||
MOVQ block+48(FP), BX
|
||||
MOVQ state+56(FP), AX
|
||||
MOVQ rounds+64(FP), DX
|
||||
|
||||
MOVQ SP, R9
|
||||
ADDQ $16, SP
|
||||
ANDQ $-16, SP
|
||||
|
||||
MOVOU 0(AX), X0
|
||||
MOVOU 16(AX), X1
|
||||
MOVOU 32(AX), X2
|
||||
MOVOU 48(AX), X3
|
||||
MOVOU ·rol16<>(SB), X13
|
||||
MOVOU ·rol8<>(SB), X14
|
||||
MOVOU ·one<>(SB), X15
|
||||
|
||||
TESTQ CX, CX
|
||||
JZ done
|
||||
|
||||
CMPQ CX, $64
|
||||
JBE between_0_and_64
|
||||
|
||||
CMPQ CX, $128
|
||||
JBE between_64_and_128
|
||||
|
||||
MOVO X0, 0(SP)
|
||||
MOVO X1, 16(SP)
|
||||
MOVO X2, 32(SP)
|
||||
MOVO X3, 48(SP)
|
||||
MOVO X15, 64(SP)
|
||||
|
||||
CMPQ CX, $192
|
||||
JBE between_128_and_192
|
||||
|
||||
MOVO X13, 96(SP)
|
||||
MOVO X14, 112(SP)
|
||||
MOVQ $192, R14
|
||||
|
||||
at_least_256:
|
||||
MOVO X0, X4
|
||||
MOVO X1, X5
|
||||
MOVO X2, X6
|
||||
MOVO X3, X7
|
||||
PADDQ 64(SP), X7
|
||||
MOVO X0, X12
|
||||
MOVO X1, X13
|
||||
MOVO X2, X14
|
||||
MOVO X7, X15
|
||||
PADDQ 64(SP), X15
|
||||
MOVO X0, X8
|
||||
MOVO X1, X9
|
||||
MOVO X2, X10
|
||||
MOVO X15, X11
|
||||
PADDQ 64(SP), X11
|
||||
|
||||
MOVQ DX, R8
|
||||
|
||||
chacha_loop_256:
|
||||
MOVO X8, 80(SP)
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X8, 96(SP), 112(SP))
|
||||
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X8, 96(SP), 112(SP))
|
||||
MOVO 80(SP), X8
|
||||
|
||||
MOVO X0, 80(SP)
|
||||
CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X0, 96(SP), 112(SP))
|
||||
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, 96(SP), 112(SP))
|
||||
MOVO 80(SP), X0
|
||||
|
||||
CHACHA_SHUFFLE(X1, X2, X3)
|
||||
CHACHA_SHUFFLE(X5, X6, X7)
|
||||
CHACHA_SHUFFLE(X13, X14, X15)
|
||||
CHACHA_SHUFFLE(X9, X10, X11)
|
||||
|
||||
MOVO X8, 80(SP)
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X8, 96(SP), 112(SP))
|
||||
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X8, 96(SP), 112(SP))
|
||||
MOVO 80(SP), X8
|
||||
|
||||
MOVO X0, 80(SP)
|
||||
CHACHA_QROUND_SSSE3(X12, X13, X14, X15, X0, 96(SP), 112(SP))
|
||||
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X0, 96(SP), 112(SP))
|
||||
MOVO 80(SP), X0
|
||||
|
||||
CHACHA_SHUFFLE(X3, X2, X1)
|
||||
CHACHA_SHUFFLE(X7, X6, X5)
|
||||
CHACHA_SHUFFLE(X15, X14, X13)
|
||||
CHACHA_SHUFFLE(X11, X10, X9)
|
||||
SUBQ $2, R8
|
||||
JA chacha_loop_256
|
||||
|
||||
MOVO X8, 80(SP)
|
||||
|
||||
PADDL 0(SP), X0
|
||||
PADDL 16(SP), X1
|
||||
PADDL 32(SP), X2
|
||||
PADDL 48(SP), X3
|
||||
XOR(DI, SI, 0, X0, X1, X2, X3, X8)
|
||||
MOVO 0(SP), X0
|
||||
MOVO 16(SP), X1
|
||||
MOVO 32(SP), X2
|
||||
MOVO 48(SP), X3
|
||||
PADDQ 64(SP), X3
|
||||
|
||||
PADDL X0, X4
|
||||
PADDL X1, X5
|
||||
PADDL X2, X6
|
||||
PADDL X3, X7
|
||||
PADDQ 64(SP), X3
|
||||
XOR(DI, SI, 64, X4, X5, X6, X7, X8)
|
||||
|
||||
MOVO 64(SP), X5
|
||||
MOVO 80(SP), X8
|
||||
|
||||
PADDL X0, X12
|
||||
PADDL X1, X13
|
||||
PADDL X2, X14
|
||||
PADDL X3, X15
|
||||
PADDQ X5, X3
|
||||
XOR(DI, SI, 128, X12, X13, X14, X15, X4)
|
||||
|
||||
PADDL X0, X8
|
||||
PADDL X1, X9
|
||||
PADDL X2, X10
|
||||
PADDL X3, X11
|
||||
PADDQ X5, X3
|
||||
|
||||
CMPQ CX, $256
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 192, X8, X9, X10, X11, X4)
|
||||
MOVO X3, 48(SP)
|
||||
ADDQ $256, SI
|
||||
ADDQ $256, DI
|
||||
SUBQ $256, CX
|
||||
CMPQ CX, $192
|
||||
JA at_least_256
|
||||
|
||||
TESTQ CX, CX
|
||||
JZ done
|
||||
MOVOU ·rol16<>(SB), X13
|
||||
MOVOU ·rol8<>(SB), X14
|
||||
MOVO 64(SP), X15
|
||||
CMPQ CX, $64
|
||||
JBE between_0_and_64
|
||||
CMPQ CX, $128
|
||||
JBE between_64_and_128
|
||||
|
||||
between_128_and_192:
|
||||
MOVQ $128, R14
|
||||
MOVO X0, X4
|
||||
MOVO X1, X5
|
||||
MOVO X2, X6
|
||||
MOVO X3, X7
|
||||
PADDQ X15, X7
|
||||
MOVO X0, X8
|
||||
MOVO X1, X9
|
||||
MOVO X2, X10
|
||||
MOVO X7, X11
|
||||
PADDQ X15, X11
|
||||
|
||||
MOVQ DX, R8
|
||||
|
||||
chacha_loop_192:
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X12, X13, X14)
|
||||
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
|
||||
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
|
||||
CHACHA_SHUFFLE(X1, X2, X3)
|
||||
CHACHA_SHUFFLE(X5, X6, X7)
|
||||
CHACHA_SHUFFLE(X9, X10, X11)
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X12, X13, X14)
|
||||
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
|
||||
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
|
||||
CHACHA_SHUFFLE(X3, X2, X1)
|
||||
CHACHA_SHUFFLE(X7, X6, X5)
|
||||
CHACHA_SHUFFLE(X11, X10, X9)
|
||||
SUBQ $2, R8
|
||||
JA chacha_loop_192
|
||||
|
||||
PADDL 0(SP), X0
|
||||
PADDL 16(SP), X1
|
||||
PADDL 32(SP), X2
|
||||
PADDL 48(SP), X3
|
||||
XOR(DI, SI, 0, X0, X1, X2, X3, X12)
|
||||
|
||||
MOVO 0(SP), X0
|
||||
MOVO 16(SP), X1
|
||||
MOVO 32(SP), X2
|
||||
MOVO 48(SP), X3
|
||||
PADDQ X15, X3
|
||||
|
||||
PADDL X0, X4
|
||||
PADDL X1, X5
|
||||
PADDL X2, X6
|
||||
PADDL X3, X7
|
||||
PADDQ X15, X3
|
||||
XOR(DI, SI, 64, X4, X5, X6, X7, X12)
|
||||
|
||||
PADDL X0, X8
|
||||
PADDL X1, X9
|
||||
PADDL X2, X10
|
||||
PADDL X3, X11
|
||||
PADDQ X15, X3
|
||||
|
||||
CMPQ CX, $192
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 128, X8, X9, X10, X11, X12)
|
||||
SUBQ $192, CX
|
||||
JMP done
|
||||
|
||||
between_64_and_128:
|
||||
MOVQ $64, R14
|
||||
MOVO X0, X4
|
||||
MOVO X1, X5
|
||||
MOVO X2, X6
|
||||
MOVO X3, X7
|
||||
MOVO X0, X8
|
||||
MOVO X1, X9
|
||||
MOVO X2, X10
|
||||
MOVO X3, X11
|
||||
PADDQ X15, X11
|
||||
|
||||
MOVQ DX, R8
|
||||
|
||||
chacha_loop_128:
|
||||
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
|
||||
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
|
||||
CHACHA_SHUFFLE(X5, X6, X7)
|
||||
CHACHA_SHUFFLE(X9, X10, X11)
|
||||
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X12, X13, X14)
|
||||
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
|
||||
CHACHA_SHUFFLE(X7, X6, X5)
|
||||
CHACHA_SHUFFLE(X11, X10, X9)
|
||||
SUBQ $2, R8
|
||||
JA chacha_loop_128
|
||||
|
||||
PADDL X0, X4
|
||||
PADDL X1, X5
|
||||
PADDL X2, X6
|
||||
PADDL X3, X7
|
||||
PADDQ X15, X3
|
||||
PADDL X0, X8
|
||||
PADDL X1, X9
|
||||
PADDL X2, X10
|
||||
PADDL X3, X11
|
||||
PADDQ X15, X3
|
||||
XOR(DI, SI, 0, X4, X5, X6, X7, X12)
|
||||
|
||||
CMPQ CX, $128
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 64, X8, X9, X10, X11, X12)
|
||||
SUBQ $128, CX
|
||||
JMP done
|
||||
|
||||
between_0_and_64:
|
||||
MOVQ $0, R14
|
||||
MOVO X0, X8
|
||||
MOVO X1, X9
|
||||
MOVO X2, X10
|
||||
MOVO X3, X11
|
||||
MOVQ DX, R8
|
||||
|
||||
chacha_loop_64:
|
||||
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
|
||||
CHACHA_SHUFFLE(X9, X10, X11)
|
||||
CHACHA_QROUND_SSSE3(X8, X9, X10, X11, X12, X13, X14)
|
||||
CHACHA_SHUFFLE(X11, X10, X9)
|
||||
SUBQ $2, R8
|
||||
JA chacha_loop_64
|
||||
|
||||
PADDL X0, X8
|
||||
PADDL X1, X9
|
||||
PADDL X2, X10
|
||||
PADDL X3, X11
|
||||
PADDQ X15, X3
|
||||
CMPQ CX, $64
|
||||
JB less_than_64
|
||||
|
||||
XOR(DI, SI, 0, X8, X9, X10, X11, X12)
|
||||
SUBQ $64, CX
|
||||
JMP done
|
||||
|
||||
less_than_64:
|
||||
// R14 contains the num of bytes already xor'd
|
||||
ADDQ R14, SI
|
||||
ADDQ R14, DI
|
||||
SUBQ R14, CX
|
||||
MOVOU X8, 0(BX)
|
||||
MOVOU X9, 16(BX)
|
||||
MOVOU X10, 32(BX)
|
||||
MOVOU X11, 48(BX)
|
||||
XORQ R11, R11
|
||||
XORQ R12, R12
|
||||
MOVQ CX, BP
|
||||
|
||||
xor_loop:
|
||||
MOVB 0(SI), R11
|
||||
MOVB 0(BX), R12
|
||||
XORQ R11, R12
|
||||
MOVB R12, 0(DI)
|
||||
INCQ SI
|
||||
INCQ BX
|
||||
INCQ DI
|
||||
DECQ BP
|
||||
JA xor_loop
|
||||
|
||||
done:
|
||||
MOVQ R9, SP
|
||||
MOVOU X3, 48(AX)
|
||||
MOVQ CX, ret+72(FP)
|
||||
RET
|
||||
|
||||
// func supportsSSSE3() bool
|
||||
TEXT ·supportsSSSE3(SB), NOSPLIT, $0-1
|
||||
XORQ AX, AX
|
||||
INCQ AX
|
||||
CPUID
|
||||
SHRQ $9, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func initialize(state *[64]byte, key []byte, nonce *[16]byte)
|
||||
TEXT ·initialize(SB), 4, $0-40
|
||||
MOVQ state+0(FP), DI
|
||||
MOVQ key+8(FP), AX
|
||||
MOVQ nonce+32(FP), BX
|
||||
|
||||
MOVOU ·sigma<>(SB), X0
|
||||
MOVOU 0(AX), X1
|
||||
MOVOU 16(AX), X2
|
||||
MOVOU 0(BX), X3
|
||||
|
||||
MOVOU X0, 0(DI)
|
||||
MOVOU X1, 16(DI)
|
||||
MOVOU X2, 32(DI)
|
||||
MOVOU X3, 48(DI)
|
||||
RET
|
||||
|
||||
// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
TEXT ·hChaCha20SSE2(SB), 4, $0-24
|
||||
MOVQ out+0(FP), DI
|
||||
MOVQ nonce+8(FP), AX
|
||||
MOVQ key+16(FP), BX
|
||||
|
||||
MOVOU ·sigma<>(SB), X0
|
||||
MOVOU 0(BX), X1
|
||||
MOVOU 16(BX), X2
|
||||
MOVOU 0(AX), X3
|
||||
|
||||
MOVQ $20, CX
|
||||
|
||||
chacha_loop:
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
|
||||
CHACHA_SHUFFLE(X1, X2, X3)
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
|
||||
CHACHA_SHUFFLE(X3, X2, X1)
|
||||
SUBQ $2, CX
|
||||
JNZ chacha_loop
|
||||
|
||||
MOVOU X0, 0(DI)
|
||||
MOVOU X3, 16(DI)
|
||||
RET
|
||||
|
||||
// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
TEXT ·hChaCha20SSSE3(SB), 4, $0-24
|
||||
MOVQ out+0(FP), DI
|
||||
MOVQ nonce+8(FP), AX
|
||||
MOVQ key+16(FP), BX
|
||||
|
||||
MOVOU ·sigma<>(SB), X0
|
||||
MOVOU 0(BX), X1
|
||||
MOVOU 16(BX), X2
|
||||
MOVOU 0(AX), X3
|
||||
MOVOU ·rol16<>(SB), X5
|
||||
MOVOU ·rol8<>(SB), X6
|
||||
|
||||
MOVQ $20, CX
|
||||
|
||||
chacha_loop:
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
|
||||
CHACHA_SHUFFLE(X1, X2, X3)
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
|
||||
CHACHA_SHUFFLE(X3, X2, X1)
|
||||
SUBQ $2, CX
|
||||
JNZ chacha_loop
|
||||
|
||||
MOVOU X0, 0(DI)
|
||||
MOVOU X3, 16(DI)
|
||||
RET
|
319
vendor/github.com/aead/chacha20/chacha/chacha_generic.go
generated
vendored
Normal file
319
vendor/github.com/aead/chacha20/chacha/chacha_generic.go
generated
vendored
Normal file
|
@ -0,0 +1,319 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package chacha
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
var sigma = [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}
|
||||
|
||||
func xorKeyStreamGeneric(dst, src []byte, block, state *[64]byte, rounds int) int {
|
||||
for len(src) >= 64 {
|
||||
chachaGeneric(block, state, rounds)
|
||||
|
||||
for i, v := range block {
|
||||
dst[i] = src[i] ^ v
|
||||
}
|
||||
src = src[64:]
|
||||
dst = dst[64:]
|
||||
}
|
||||
|
||||
n := len(src)
|
||||
if n > 0 {
|
||||
chachaGeneric(block, state, rounds)
|
||||
for i, v := range src {
|
||||
dst[i] = v ^ block[i]
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func chachaGeneric(dst *[64]byte, state *[64]byte, rounds int) {
|
||||
v00 := binary.LittleEndian.Uint32(state[0:])
|
||||
v01 := binary.LittleEndian.Uint32(state[4:])
|
||||
v02 := binary.LittleEndian.Uint32(state[8:])
|
||||
v03 := binary.LittleEndian.Uint32(state[12:])
|
||||
v04 := binary.LittleEndian.Uint32(state[16:])
|
||||
v05 := binary.LittleEndian.Uint32(state[20:])
|
||||
v06 := binary.LittleEndian.Uint32(state[24:])
|
||||
v07 := binary.LittleEndian.Uint32(state[28:])
|
||||
v08 := binary.LittleEndian.Uint32(state[32:])
|
||||
v09 := binary.LittleEndian.Uint32(state[36:])
|
||||
v10 := binary.LittleEndian.Uint32(state[40:])
|
||||
v11 := binary.LittleEndian.Uint32(state[44:])
|
||||
v12 := binary.LittleEndian.Uint32(state[48:])
|
||||
v13 := binary.LittleEndian.Uint32(state[52:])
|
||||
v14 := binary.LittleEndian.Uint32(state[56:])
|
||||
v15 := binary.LittleEndian.Uint32(state[60:])
|
||||
|
||||
s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07
|
||||
s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15
|
||||
|
||||
for i := 0; i < rounds; i += 2 {
|
||||
v00 += v04
|
||||
v12 ^= v00
|
||||
v12 = (v12 << 16) | (v12 >> 16)
|
||||
v08 += v12
|
||||
v04 ^= v08
|
||||
v04 = (v04 << 12) | (v04 >> 20)
|
||||
v00 += v04
|
||||
v12 ^= v00
|
||||
v12 = (v12 << 8) | (v12 >> 24)
|
||||
v08 += v12
|
||||
v04 ^= v08
|
||||
v04 = (v04 << 7) | (v04 >> 25)
|
||||
v01 += v05
|
||||
v13 ^= v01
|
||||
v13 = (v13 << 16) | (v13 >> 16)
|
||||
v09 += v13
|
||||
v05 ^= v09
|
||||
v05 = (v05 << 12) | (v05 >> 20)
|
||||
v01 += v05
|
||||
v13 ^= v01
|
||||
v13 = (v13 << 8) | (v13 >> 24)
|
||||
v09 += v13
|
||||
v05 ^= v09
|
||||
v05 = (v05 << 7) | (v05 >> 25)
|
||||
v02 += v06
|
||||
v14 ^= v02
|
||||
v14 = (v14 << 16) | (v14 >> 16)
|
||||
v10 += v14
|
||||
v06 ^= v10
|
||||
v06 = (v06 << 12) | (v06 >> 20)
|
||||
v02 += v06
|
||||
v14 ^= v02
|
||||
v14 = (v14 << 8) | (v14 >> 24)
|
||||
v10 += v14
|
||||
v06 ^= v10
|
||||
v06 = (v06 << 7) | (v06 >> 25)
|
||||
v03 += v07
|
||||
v15 ^= v03
|
||||
v15 = (v15 << 16) | (v15 >> 16)
|
||||
v11 += v15
|
||||
v07 ^= v11
|
||||
v07 = (v07 << 12) | (v07 >> 20)
|
||||
v03 += v07
|
||||
v15 ^= v03
|
||||
v15 = (v15 << 8) | (v15 >> 24)
|
||||
v11 += v15
|
||||
v07 ^= v11
|
||||
v07 = (v07 << 7) | (v07 >> 25)
|
||||
v00 += v05
|
||||
v15 ^= v00
|
||||
v15 = (v15 << 16) | (v15 >> 16)
|
||||
v10 += v15
|
||||
v05 ^= v10
|
||||
v05 = (v05 << 12) | (v05 >> 20)
|
||||
v00 += v05
|
||||
v15 ^= v00
|
||||
v15 = (v15 << 8) | (v15 >> 24)
|
||||
v10 += v15
|
||||
v05 ^= v10
|
||||
v05 = (v05 << 7) | (v05 >> 25)
|
||||
v01 += v06
|
||||
v12 ^= v01
|
||||
v12 = (v12 << 16) | (v12 >> 16)
|
||||
v11 += v12
|
||||
v06 ^= v11
|
||||
v06 = (v06 << 12) | (v06 >> 20)
|
||||
v01 += v06
|
||||
v12 ^= v01
|
||||
v12 = (v12 << 8) | (v12 >> 24)
|
||||
v11 += v12
|
||||
v06 ^= v11
|
||||
v06 = (v06 << 7) | (v06 >> 25)
|
||||
v02 += v07
|
||||
v13 ^= v02
|
||||
v13 = (v13 << 16) | (v13 >> 16)
|
||||
v08 += v13
|
||||
v07 ^= v08
|
||||
v07 = (v07 << 12) | (v07 >> 20)
|
||||
v02 += v07
|
||||
v13 ^= v02
|
||||
v13 = (v13 << 8) | (v13 >> 24)
|
||||
v08 += v13
|
||||
v07 ^= v08
|
||||
v07 = (v07 << 7) | (v07 >> 25)
|
||||
v03 += v04
|
||||
v14 ^= v03
|
||||
v14 = (v14 << 16) | (v14 >> 16)
|
||||
v09 += v14
|
||||
v04 ^= v09
|
||||
v04 = (v04 << 12) | (v04 >> 20)
|
||||
v03 += v04
|
||||
v14 ^= v03
|
||||
v14 = (v14 << 8) | (v14 >> 24)
|
||||
v09 += v14
|
||||
v04 ^= v09
|
||||
v04 = (v04 << 7) | (v04 >> 25)
|
||||
}
|
||||
|
||||
v00 += s00
|
||||
v01 += s01
|
||||
v02 += s02
|
||||
v03 += s03
|
||||
v04 += s04
|
||||
v05 += s05
|
||||
v06 += s06
|
||||
v07 += s07
|
||||
v08 += s08
|
||||
v09 += s09
|
||||
v10 += s10
|
||||
v11 += s11
|
||||
v12 += s12
|
||||
v13 += s13
|
||||
v14 += s14
|
||||
v15 += s15
|
||||
|
||||
s12++
|
||||
binary.LittleEndian.PutUint32(state[48:], s12)
|
||||
if s12 == 0 { // indicates overflow
|
||||
s13++
|
||||
binary.LittleEndian.PutUint32(state[52:], s13)
|
||||
}
|
||||
|
||||
binary.LittleEndian.PutUint32(dst[0:], v00)
|
||||
binary.LittleEndian.PutUint32(dst[4:], v01)
|
||||
binary.LittleEndian.PutUint32(dst[8:], v02)
|
||||
binary.LittleEndian.PutUint32(dst[12:], v03)
|
||||
binary.LittleEndian.PutUint32(dst[16:], v04)
|
||||
binary.LittleEndian.PutUint32(dst[20:], v05)
|
||||
binary.LittleEndian.PutUint32(dst[24:], v06)
|
||||
binary.LittleEndian.PutUint32(dst[28:], v07)
|
||||
binary.LittleEndian.PutUint32(dst[32:], v08)
|
||||
binary.LittleEndian.PutUint32(dst[36:], v09)
|
||||
binary.LittleEndian.PutUint32(dst[40:], v10)
|
||||
binary.LittleEndian.PutUint32(dst[44:], v11)
|
||||
binary.LittleEndian.PutUint32(dst[48:], v12)
|
||||
binary.LittleEndian.PutUint32(dst[52:], v13)
|
||||
binary.LittleEndian.PutUint32(dst[56:], v14)
|
||||
binary.LittleEndian.PutUint32(dst[60:], v15)
|
||||
}
|
||||
|
||||
func hChaCha20Generic(out *[32]byte, nonce *[16]byte, key *[32]byte) {
|
||||
v00 := sigma[0]
|
||||
v01 := sigma[1]
|
||||
v02 := sigma[2]
|
||||
v03 := sigma[3]
|
||||
v04 := binary.LittleEndian.Uint32(key[0:])
|
||||
v05 := binary.LittleEndian.Uint32(key[4:])
|
||||
v06 := binary.LittleEndian.Uint32(key[8:])
|
||||
v07 := binary.LittleEndian.Uint32(key[12:])
|
||||
v08 := binary.LittleEndian.Uint32(key[16:])
|
||||
v09 := binary.LittleEndian.Uint32(key[20:])
|
||||
v10 := binary.LittleEndian.Uint32(key[24:])
|
||||
v11 := binary.LittleEndian.Uint32(key[28:])
|
||||
v12 := binary.LittleEndian.Uint32(nonce[0:])
|
||||
v13 := binary.LittleEndian.Uint32(nonce[4:])
|
||||
v14 := binary.LittleEndian.Uint32(nonce[8:])
|
||||
v15 := binary.LittleEndian.Uint32(nonce[12:])
|
||||
|
||||
for i := 0; i < 20; i += 2 {
|
||||
v00 += v04
|
||||
v12 ^= v00
|
||||
v12 = (v12 << 16) | (v12 >> 16)
|
||||
v08 += v12
|
||||
v04 ^= v08
|
||||
v04 = (v04 << 12) | (v04 >> 20)
|
||||
v00 += v04
|
||||
v12 ^= v00
|
||||
v12 = (v12 << 8) | (v12 >> 24)
|
||||
v08 += v12
|
||||
v04 ^= v08
|
||||
v04 = (v04 << 7) | (v04 >> 25)
|
||||
v01 += v05
|
||||
v13 ^= v01
|
||||
v13 = (v13 << 16) | (v13 >> 16)
|
||||
v09 += v13
|
||||
v05 ^= v09
|
||||
v05 = (v05 << 12) | (v05 >> 20)
|
||||
v01 += v05
|
||||
v13 ^= v01
|
||||
v13 = (v13 << 8) | (v13 >> 24)
|
||||
v09 += v13
|
||||
v05 ^= v09
|
||||
v05 = (v05 << 7) | (v05 >> 25)
|
||||
v02 += v06
|
||||
v14 ^= v02
|
||||
v14 = (v14 << 16) | (v14 >> 16)
|
||||
v10 += v14
|
||||
v06 ^= v10
|
||||
v06 = (v06 << 12) | (v06 >> 20)
|
||||
v02 += v06
|
||||
v14 ^= v02
|
||||
v14 = (v14 << 8) | (v14 >> 24)
|
||||
v10 += v14
|
||||
v06 ^= v10
|
||||
v06 = (v06 << 7) | (v06 >> 25)
|
||||
v03 += v07
|
||||
v15 ^= v03
|
||||
v15 = (v15 << 16) | (v15 >> 16)
|
||||
v11 += v15
|
||||
v07 ^= v11
|
||||
v07 = (v07 << 12) | (v07 >> 20)
|
||||
v03 += v07
|
||||
v15 ^= v03
|
||||
v15 = (v15 << 8) | (v15 >> 24)
|
||||
v11 += v15
|
||||
v07 ^= v11
|
||||
v07 = (v07 << 7) | (v07 >> 25)
|
||||
v00 += v05
|
||||
v15 ^= v00
|
||||
v15 = (v15 << 16) | (v15 >> 16)
|
||||
v10 += v15
|
||||
v05 ^= v10
|
||||
v05 = (v05 << 12) | (v05 >> 20)
|
||||
v00 += v05
|
||||
v15 ^= v00
|
||||
v15 = (v15 << 8) | (v15 >> 24)
|
||||
v10 += v15
|
||||
v05 ^= v10
|
||||
v05 = (v05 << 7) | (v05 >> 25)
|
||||
v01 += v06
|
||||
v12 ^= v01
|
||||
v12 = (v12 << 16) | (v12 >> 16)
|
||||
v11 += v12
|
||||
v06 ^= v11
|
||||
v06 = (v06 << 12) | (v06 >> 20)
|
||||
v01 += v06
|
||||
v12 ^= v01
|
||||
v12 = (v12 << 8) | (v12 >> 24)
|
||||
v11 += v12
|
||||
v06 ^= v11
|
||||
v06 = (v06 << 7) | (v06 >> 25)
|
||||
v02 += v07
|
||||
v13 ^= v02
|
||||
v13 = (v13 << 16) | (v13 >> 16)
|
||||
v08 += v13
|
||||
v07 ^= v08
|
||||
v07 = (v07 << 12) | (v07 >> 20)
|
||||
v02 += v07
|
||||
v13 ^= v02
|
||||
v13 = (v13 << 8) | (v13 >> 24)
|
||||
v08 += v13
|
||||
v07 ^= v08
|
||||
v07 = (v07 << 7) | (v07 >> 25)
|
||||
v03 += v04
|
||||
v14 ^= v03
|
||||
v14 = (v14 << 16) | (v14 >> 16)
|
||||
v09 += v14
|
||||
v04 ^= v09
|
||||
v04 = (v04 << 12) | (v04 >> 20)
|
||||
v03 += v04
|
||||
v14 ^= v03
|
||||
v14 = (v14 << 8) | (v14 >> 24)
|
||||
v09 += v14
|
||||
v04 ^= v09
|
||||
v04 = (v04 << 7) | (v04 >> 25)
|
||||
}
|
||||
|
||||
binary.LittleEndian.PutUint32(out[0:], v00)
|
||||
binary.LittleEndian.PutUint32(out[4:], v01)
|
||||
binary.LittleEndian.PutUint32(out[8:], v02)
|
||||
binary.LittleEndian.PutUint32(out[12:], v03)
|
||||
binary.LittleEndian.PutUint32(out[16:], v12)
|
||||
binary.LittleEndian.PutUint32(out[20:], v13)
|
||||
binary.LittleEndian.PutUint32(out[24:], v14)
|
||||
binary.LittleEndian.PutUint32(out[28:], v15)
|
||||
}
|
56
vendor/github.com/aead/chacha20/chacha/chacha_go16_amd64.go
generated
vendored
Normal file
56
vendor/github.com/aead/chacha20/chacha/chacha_go16_amd64.go
generated
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
// Copyright (c) 2017 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build amd64,!gccgo,!appengine,!nacl,!go1.7
|
||||
|
||||
package chacha
|
||||
|
||||
func init() {
|
||||
useSSE2 = true
|
||||
useSSSE3 = supportsSSSE3()
|
||||
useAVX2 = false
|
||||
}
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func initialize(state *[64]byte, key []byte, nonce *[16]byte)
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func supportsSSSE3() bool
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
|
||||
if useSSSE3 {
|
||||
hChaCha20SSSE3(out, nonce, key)
|
||||
} else if useSSE2 { // on amd64 this is always true - used to test generic on amd64
|
||||
hChaCha20SSE2(out, nonce, key)
|
||||
} else {
|
||||
hChaCha20Generic(out, nonce, key)
|
||||
}
|
||||
}
|
||||
|
||||
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
|
||||
if useSSSE3 {
|
||||
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
|
||||
} else if useSSE2 { // on amd64 this is always true - used to test generic on amd64
|
||||
return xorKeyStreamSSE2(dst, src, block, state, rounds)
|
||||
}
|
||||
return xorKeyStreamGeneric(dst, src, block, state, rounds)
|
||||
}
|
72
vendor/github.com/aead/chacha20/chacha/chacha_go17_amd64.go
generated
vendored
Normal file
72
vendor/github.com/aead/chacha20/chacha/chacha_go17_amd64.go
generated
vendored
Normal file
|
@ -0,0 +1,72 @@
|
|||
// Copyright (c) 2017 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build go1.7,amd64,!gccgo,!appengine,!nacl
|
||||
|
||||
package chacha
|
||||
|
||||
func init() {
|
||||
useSSE2 = true
|
||||
useSSSE3 = supportsSSSE3()
|
||||
useAVX2 = supportsAVX2()
|
||||
}
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func initialize(state *[64]byte, key []byte, nonce *[16]byte)
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func supportsSSSE3() bool
|
||||
|
||||
// This function is implemented in chachaAVX2_amd64.s
|
||||
//go:noescape
|
||||
func supportsAVX2() bool
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chachaAVX2_amd64.s
|
||||
//go:noescape
|
||||
func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
// This function is implemented in chachaAVX2_amd64.s
|
||||
//go:noescape
|
||||
func xorKeyStreamAVX2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
|
||||
if useAVX2 {
|
||||
hChaCha20AVX(out, nonce, key)
|
||||
} else if useSSSE3 {
|
||||
hChaCha20SSSE3(out, nonce, key)
|
||||
} else if useSSE2 { // on amd64 this is always true - neccessary for testing generic on amd64
|
||||
hChaCha20SSE2(out, nonce, key)
|
||||
} else {
|
||||
hChaCha20Generic(out, nonce, key)
|
||||
}
|
||||
}
|
||||
|
||||
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
|
||||
if useAVX2 {
|
||||
return xorKeyStreamAVX2(dst, src, block, state, rounds)
|
||||
} else if useSSSE3 {
|
||||
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
|
||||
} else if useSSE2 { // on amd64 this is always true - neccessary for testing generic on amd64
|
||||
return xorKeyStreamSSE2(dst, src, block, state, rounds)
|
||||
}
|
||||
return xorKeyStreamGeneric(dst, src, block, state, rounds)
|
||||
}
|
26
vendor/github.com/aead/chacha20/chacha/chacha_ref.go
generated
vendored
Normal file
26
vendor/github.com/aead/chacha20/chacha/chacha_ref.go
generated
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build !amd64,!386 gccgo appengine nacl
|
||||
|
||||
package chacha
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
|
||||
binary.LittleEndian.PutUint32(state[0:], sigma[0])
|
||||
binary.LittleEndian.PutUint32(state[4:], sigma[1])
|
||||
binary.LittleEndian.PutUint32(state[8:], sigma[2])
|
||||
binary.LittleEndian.PutUint32(state[12:], sigma[3])
|
||||
copy(state[16:], key[:])
|
||||
copy(state[48:], nonce[:])
|
||||
}
|
||||
|
||||
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
|
||||
return xorKeyStreamGeneric(dst, src, block, state, rounds)
|
||||
}
|
||||
|
||||
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
|
||||
hChaCha20Generic(out, nonce, key)
|
||||
}
|
382
vendor/github.com/aead/chacha20/chacha/chacha_test.go
generated
vendored
Normal file
382
vendor/github.com/aead/chacha20/chacha/chacha_test.go
generated
vendored
Normal file
|
@ -0,0 +1,382 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package chacha
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func toHex(bits []byte) string {
|
||||
return hex.EncodeToString(bits)
|
||||
}
|
||||
|
||||
func fromHex(bits string) []byte {
|
||||
b, err := hex.DecodeString(bits)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func TestHChaCha20(t *testing.T) {
|
||||
defer func(sse2, ssse3, avx2 bool) {
|
||||
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
|
||||
}(useSSE2, useSSSE3, useAVX2)
|
||||
|
||||
if useAVX2 {
|
||||
t.Log("AVX2 version")
|
||||
testHChaCha20(t)
|
||||
useAVX2 = false
|
||||
}
|
||||
if useSSSE3 {
|
||||
t.Log("SSSE3 version")
|
||||
testHChaCha20(t)
|
||||
useSSSE3 = false
|
||||
}
|
||||
if useSSE2 {
|
||||
t.Log("SSE2 version")
|
||||
testHChaCha20(t)
|
||||
useSSE2 = false
|
||||
}
|
||||
t.Log("generic version")
|
||||
testHChaCha20(t)
|
||||
}
|
||||
|
||||
func TestVectors(t *testing.T) {
|
||||
defer func(sse2, ssse3, avx2 bool) {
|
||||
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
|
||||
}(useSSE2, useSSSE3, useAVX2)
|
||||
|
||||
if useAVX2 {
|
||||
t.Log("AVX2 version")
|
||||
testVectors(t)
|
||||
useAVX2 = false
|
||||
}
|
||||
if useSSSE3 {
|
||||
t.Log("SSSE3 version")
|
||||
testVectors(t)
|
||||
useSSSE3 = false
|
||||
}
|
||||
if useSSE2 {
|
||||
t.Log("SSE2 version")
|
||||
testVectors(t)
|
||||
useSSE2 = false
|
||||
}
|
||||
t.Log("generic version")
|
||||
testVectors(t)
|
||||
}
|
||||
|
||||
func TestIncremental(t *testing.T) {
|
||||
defer func(sse2, ssse3, avx2 bool) {
|
||||
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
|
||||
}(useSSE2, useSSSE3, useAVX2)
|
||||
|
||||
if useAVX2 {
|
||||
t.Log("AVX2 version")
|
||||
testIncremental(t, 5, 2049)
|
||||
useAVX2 = false
|
||||
}
|
||||
if useSSSE3 {
|
||||
t.Log("SSSE3 version")
|
||||
testIncremental(t, 5, 2049)
|
||||
useSSSE3 = false
|
||||
}
|
||||
if useSSE2 {
|
||||
t.Log("SSE2 version")
|
||||
testIncremental(t, 5, 2049)
|
||||
}
|
||||
}
|
||||
|
||||
func testHChaCha20(t *testing.T) {
|
||||
for i, v := range hChaCha20Vectors {
|
||||
var key [32]byte
|
||||
var nonce [16]byte
|
||||
copy(key[:], v.key)
|
||||
copy(nonce[:], v.nonce)
|
||||
|
||||
hChaCha20(&key, &nonce, &key)
|
||||
if !bytes.Equal(key[:], v.keystream) {
|
||||
t.Errorf("Test %d: keystream mismatch:\n \t got: %s\n \t want: %s", i, toHex(key[:]), toHex(v.keystream))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testVectors(t *testing.T) {
|
||||
for i, v := range vectors {
|
||||
if len(v.plaintext) == 0 {
|
||||
v.plaintext = make([]byte, len(v.ciphertext))
|
||||
}
|
||||
|
||||
dst := make([]byte, len(v.ciphertext))
|
||||
|
||||
XORKeyStream(dst, v.plaintext, v.nonce, v.key, v.rounds)
|
||||
if !bytes.Equal(dst, v.ciphertext) {
|
||||
t.Errorf("Test %d: ciphertext mismatch:\n \t got: %s\n \t want: %s", i, toHex(dst), toHex(v.ciphertext))
|
||||
}
|
||||
|
||||
c, err := NewCipher(v.nonce, v.key, v.rounds)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
c.XORKeyStream(dst[:1], v.plaintext[:1])
|
||||
c.XORKeyStream(dst[1:], v.plaintext[1:])
|
||||
if !bytes.Equal(dst, v.ciphertext) {
|
||||
t.Errorf("Test %d: ciphertext mismatch:\n \t got: %s\n \t want: %s", i, toHex(dst), toHex(v.ciphertext))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testIncremental(t *testing.T, iter int, size int) {
|
||||
sse2, ssse3, avx2 := useSSE2, useSSSE3, useAVX2
|
||||
msg, ref, stream := make([]byte, size), make([]byte, size), make([]byte, size)
|
||||
|
||||
for i := 0; i < iter; i++ {
|
||||
var key [32]byte
|
||||
var nonce []byte
|
||||
switch i % 3 {
|
||||
case 0:
|
||||
nonce = make([]byte, 8)
|
||||
case 1:
|
||||
nonce = make([]byte, 12)
|
||||
case 2:
|
||||
nonce = make([]byte, 24)
|
||||
}
|
||||
|
||||
for j := range key {
|
||||
key[j] = byte(len(nonce) + i)
|
||||
}
|
||||
for j := range nonce {
|
||||
nonce[j] = byte(i)
|
||||
}
|
||||
|
||||
for j := 0; j <= len(msg); j++ {
|
||||
useSSE2, useSSSE3, useAVX2 = false, false, false
|
||||
XORKeyStream(ref[:j], msg[:j], nonce, key[:], 20)
|
||||
|
||||
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
|
||||
XORKeyStream(stream[:j], msg[:j], nonce, key[:], 20)
|
||||
|
||||
if !bytes.Equal(ref[:j], stream[:j]) {
|
||||
t.Fatalf("Iteration %d failed:\n Message length: %d\n\n got: %s\nwant: %s", i, j, toHex(stream[:j]), toHex(ref[:j]))
|
||||
}
|
||||
|
||||
useSSE2, useSSSE3, useAVX2 = false, false, false
|
||||
c, _ := NewCipher(nonce, key[:], 20)
|
||||
c.XORKeyStream(stream[:j], msg[:j])
|
||||
|
||||
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
|
||||
c, _ = NewCipher(nonce, key[:], 20)
|
||||
c.XORKeyStream(stream[:j], msg[:j])
|
||||
|
||||
if !bytes.Equal(ref[:j], stream[:j]) {
|
||||
t.Fatalf("Iteration %d failed:\n Message length: %d\n\n got: %s\nwant: %s", i, j, toHex(stream[:j]), toHex(ref[:j]))
|
||||
}
|
||||
}
|
||||
copy(msg, stream)
|
||||
}
|
||||
}
|
||||
|
||||
var hChaCha20Vectors = []struct {
|
||||
key, nonce, keystream []byte
|
||||
}{
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000000000000000000000000000"),
|
||||
fromHex("1140704c328d1d5d0e30086cdf209dbd6a43b8f41518a11cc387b669b2ee6586"),
|
||||
},
|
||||
{
|
||||
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000000000000000000000000000"),
|
||||
fromHex("7d266a7fd808cae4c02a0a70dcbfbcc250dae65ce3eae7fc210f54cc8f77df86"),
|
||||
},
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000001"),
|
||||
fromHex("000000000000000000000000000000000000000000000002"),
|
||||
fromHex("e0c77ff931bb9163a5460c02ac281c2b53d792b1c43fea817e9ad275ae546963"),
|
||||
},
|
||||
{
|
||||
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
|
||||
fromHex("000102030405060708090a0b0c0d0e0f1011121314151617"),
|
||||
fromHex("51e3ff45a895675c4b33b46c64f4a9ace110d34df6a2ceab486372bacbd3eff6"),
|
||||
},
|
||||
}
|
||||
|
||||
var vectors = []struct {
|
||||
key, nonce, plaintext, ciphertext []byte
|
||||
rounds int
|
||||
}{
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000001"),
|
||||
fromHex("000000000000000000000002"),
|
||||
fromHex("00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69" +
|
||||
"636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073" +
|
||||
"746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572" +
|
||||
"656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e" +
|
||||
"747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e696361" +
|
||||
"74696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f"),
|
||||
fromHex("ecfa254f845f647473d3cb140da9e87606cb33066c447b87bc2666dde3fbb739a371c9ec7abcb4cfa9211f7d90f64c2d07f89e5cf9b93e330a6e4c08af5ba6d5" +
|
||||
"a3fbf07df3fa2fde4f376ca23e82737041605d9f4f4f57bd8cff2c1d4b7955ec2a97948bd3722915c8f3d337f7d370050e9e96d647b7c39f56e031ca5eb6250d" +
|
||||
"4042e02785ececfa4b4bb5e8ead0440e20b6e8db09d881a7c6132f420e52795042bdfa7773d8a9051447b3291ce1411c680465552aa6c405b7764d5e87bea85a" +
|
||||
"d00f8449ed8f72d0d662ab052691ca66424bc86d2df80ea41f43abf937d3259dc4b2d0dfb48a6c9139ddd7f76966e928e635553ba76c5c879d7b35d49eb2e62b" +
|
||||
"0871cdac638939e25e8a1e0ef9d5280fa8ca328b351c3c765989cbcf3daa8b6ccc3aaf9f3979c92b3720fc88dc95ed84a1be059c6499b9fda236e7e818b04b0b" +
|
||||
"c39c1e876b193bfe5569753f88128cc08aaa9b63d1a16f80ef2554d7189c411f5869ca52c5b83fa36ff216b9c1d30062bebcfd2dc5bce0911934fda79a86f6e6" +
|
||||
"98ced759c3ff9b6477338f3da4f9cd8514ea9982ccafb341b2384dd902f3d1ab7ac61dd29c6f21ba5b862f3730e37cfdc4fd806c22f221"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("e29edae0466dea17f2576ce95025dd2db2d34fc81b5153f1b70a87f315a35286"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("e29edae0466dea17f2576ce95025dd2db2d34fc81b5153f1b70a87f315a35286fb56db91e8dbf0a93faaa25777aad63450dae65ce3eae7fc210f54cc8f77df8662f8" +
|
||||
"955228b2358d61d8c5ccf63a6c40203be5fb4541c39c52861de70b8a1416ddd3fe9a818bae8f0e8ff2288cede0459fbb00032fd85fef972fcb586c228d"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("0000000000000000"),
|
||||
nil,
|
||||
fromHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee65869f07" +
|
||||
"e7be5551387a98ba977c732d080dcb0f29a048e3656912c6533e32ee7aed29b721769ce64e43d57133b074d839d531ed1f28510afb45ace10a1f4b794d6f2d09a0e663266ce1ae7ed1081968a0758e7" +
|
||||
"18e997bd362c6b0c34634a9a0b35d012737681f7b5d0f281e3afde458bc1e73d2d313c9cf94c05ff3716240a248f21320a058d7b3566bd520daaa3ed2bf0ac5b8b120fb852773c3639734b45c91a42d" +
|
||||
"d4cb83f8840d2eedb158131062ac3f1f2cf8ff6dcd1856e86a1e6c3167167ee5a688742b47c5adfb59d4df76fd1db1e51ee03b1ca9f82aca173edb8b7293474ebe980f904d10c916442b4783a0e9848" +
|
||||
"60cb6c957b39c38ed8f51cffaa68a4de01025a39c504546b9dc1406a7eb28151e5150d7b204baa719d4f091021217db5cf1b5c84c4fa71a879610a1a695ac527c5b56774a6b8a21aae88685868e094c" +
|
||||
"f29ef4090af7a90cc07e8817aa528763797d3c332b67ca4bc110642c2151ec47ee84cb8c42d85f10e2a8cb18c3b7335f26e8c39a12b1bcc1707177b7613873"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("0100000000000000"),
|
||||
nil,
|
||||
fromHex("ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d32111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b5305" +
|
||||
"e5e44aff19b235936144675efbe4409eb7e8e5f1430f5f5836aeb49bb5328b017c4b9dc11f8a03863fa803dc71d5726b2b6b31aa32708afe5af1d6b690584d58792b271e5fdb92c486051c48b79a4d4" +
|
||||
"8a109bb2d0477956e74c25e93c3c2db34bf779470464a033b8394517a5cf3576a6618c8551a456628b253ef0117c90cd46d8177a2a06d16e20e05c05f889bf87e95d6ee8a03807d1cd53d586872b125" +
|
||||
"9d0647da7b7aae80af9b3aad41ad5a8141d2e156c9dd52a3bd2ae165bd7d6a2a4e2cf6938b8b390828ff20dc8fd60e2cd17fe368e35b467a70654ba93cfa62760a9d2f26da7818d4d863808e1add5ff" +
|
||||
"db76d41efd524ded4246e03caa008950c91dedfc9a8e68173fe481c4d3d3c215fdf3af22aeab0097b835a84faabbbce094c6181a193ffeda067271ff7c10cce76542241116283842e31e922430211dc" +
|
||||
"b38e556158fc2daaec367b705b75f782f8bc2c2c5e33a375390c3052f7e3446feb105fb47820f1d2539811c5b49bb76dc15f2d20a7e2c200b573db9f653ed7"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
|
||||
fromHex("0001020304050607"),
|
||||
nil,
|
||||
fromHex("f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a3800" +
|
||||
"8b9a26bc35941e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc3852" +
|
||||
"45fe054e3dd5a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2cc" +
|
||||
"b27d5aaae0ad7ad0f9d4b6ad3b54098746d4524d38407a6deb3ab78fab78c94213668bbbd394c5de93b853178addd6b97f9fa1ec3e56c00c9ddff0a44a204241175a4cab0f961ba53ede9bdf960b94f" +
|
||||
"9829b1f3414726429b362c5b538e391520f489b7ed8d20ae3fd49e9e259e44397514d618c96c4846be3c680bdc11c71dcbbe29ccf80d62a0938fa549391e6ea57ecbe2606790ec15d2224ae307c1442" +
|
||||
"26b7c4e8c2f97d2a1d67852d29beba110edd445197012062a393a9c92803ad3b4f31d7bc6033ccf7932cfed3f019044d25905916777286f82f9a4cc1ffe430"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("9bf49a6a0755f953811fce125f2683d50429c3bb49e074147e0089a52eae155f0564f879d27ae3c02ce82834acfa8c793a629f2ca0de6919610be82f411326be0bd588" +
|
||||
"41203e74fe86fc71338ce0173dc628ebb719bdcbcc151585214cc089b442258dcda14cf111c602b8971b8cc843e91e46ca905151c02744a6b017e69316b20cd67c4bdecc538e8be990c1b6425d68bfd3a" +
|
||||
"6fe97693e4846351596cca8abf59fddd0b7f52dcc0c60a448cbf9511610b0a742f1e4d238a7a45cae054ec2"),
|
||||
12,
|
||||
},
|
||||
{
|
||||
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("789cc357f0b6cda5395f08c8538f1226d08eb3e16ebd6b6db6cc9ca77d81d900bb9d21f6ef0b720550d161f1a80fab0468e48c086daad356edce3a3f988d8e"),
|
||||
12,
|
||||
},
|
||||
{
|
||||
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
|
||||
fromHex("0001020304050607"),
|
||||
nil,
|
||||
fromHex("6898eb04f3d151985e28e882f35daf28d2a1689f79081ffb08cdc48edbbd3dcd683c764f3dd7302293928ca3d4ef4194e6e22f41a72204a14b89115d06ca29fb0b9f6e" +
|
||||
"ba3da6793a928afe76cdf62a5d5b0898bb9bb2348612189fdb825e5aa7559c9ec79ff80d05079fad81e9bc2521b2ebcb179cebeade91f20ff3e13192d60de2ee983ec07047e7827594773c28448d89e9b" +
|
||||
"96bb0f8665b1a56f85abebd584a446e17d5a6fb847a1dbf341ece5124ff5f80d4a57fb7edf65a2907939b2f3c9654ccbfa2e5225edc8d799bf7ce296d6c8f9234cec0bd7b91b3d2ddc27f93ff8591ddb3" +
|
||||
"62b54fab111a7da9d5b4187661ed0e691f7aa5959fb83112427a95bbeb"),
|
||||
12,
|
||||
},
|
||||
{
|
||||
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
|
||||
fromHex("0001020304050607"),
|
||||
nil,
|
||||
fromHex("40e1aaea1c843baa28b18eb728fec05dce47b0e824bf9a5d3f1bb1aad13b37fbbf0b0e146732c16380efeab70a1b6edff9acedc876b70d98b61f192290537973"),
|
||||
8,
|
||||
},
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("bcd02a18bf3f01d19292de30a7a8fdaca4b65e50a6002cc72cd6d2f7c91ac3d5728f83e0aad2bfcf9abd2d2db58faedd65015dd83fc09b131e271043019e8e0f789e96" +
|
||||
"89e5208d7fd9e1f3c5b5341f48ef18a13e418998addadd97a3693a987f8e82ecd5c1433bfed1af49750c0f1ff29c4174a05b119aa3a9e8333812e0c0fea49e1ee0134a70a9d49c24e0cbd8fc3ba27e97c" +
|
||||
"3322ad487f778f8dc6a122fa59cbe33e7"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("8000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("ccfe8a9e93431bd582f07b3eb0f4a7afc22ef39337ddd84f0d3545b318a315a32b3abb96de0fc6acde48b248fe8a80e6fa72bfcdf9d8d2656b991676476f052d937308" +
|
||||
"0e30d8c0e217126a3c64402e1d9404ba9d6b8ce4ad5ac9693f3660638c26ea2cd1b4a8d3348c1e179ead353ee72fee558e9994c51a27195e287d00ec2f8cfef8866d1f98714f40cbe4e18cebabf3cd1fd" +
|
||||
"3bb65506e5dce1ad09f438bffe2c96d7f2f0827c8c3f2ca59dbaa393785c6b8da7c69c8a4a63ffd113dcc93de8f52dbcfaed5e4cbcc1dc310b1352868fab7b14d930a9f7a7d47bed0eaf5b151f6dac8bd" +
|
||||
"45510698bdc205d70b944ea5450888dd3ec753da9708bf06c0714822dda74f285c361abd0cd1071324c253dc421905edca36e8808bffef091e7dbdecebdad98cf70b7cede72e9c3c4108e5b32ffae0f42" +
|
||||
"151a8196939d8e3b8384be1"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"),
|
||||
fromHex("000102030405060708090a0b0c0d0e0f1011121314151617"),
|
||||
nil,
|
||||
fromHex("e53a61cef151e81401067de33adfc02e90ab205361b49b539fda7f0e63b1bc7d68fbee56c9c20c39960e595f3ea76c979804d08cfa728e66cb5f766b840ec61f9ec20f" +
|
||||
"7f90d28dae334426cecb52a8e84b4728a5fdd61deb7f1a3fb63dadf5595e06b6e441670964d595ae59cf21536271bae2594774fb19079b933d8fe744f4"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("FF00000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("4fe0956ef81829ff96ef093f03c15dc0eaf4e6905eff9777a5db78348915689ed64204e8fce664cb71ea4016185d15e05be4329e02fcd472707508ef62fd89565ffa632effdb" +
|
||||
"bf08394aa437d8ff093e6cea49b61672cf294474927a8150e06cec9fdec0f5cf26f257fe335a8d7dd6d208e6df6f0a83bb1b0b5c574edc2c9a604e4310acb970815a9819c91a5137794d1ee71ede3e5d59f27e76" +
|
||||
"84d287d704fe3945de0a9b66be3d86e66980263602aeb600efaef243b1adf4c701dbf8f57427dee71dacd703d25317ffc7a67e7881ad13f0bf096d3b0486eec71fef5e0efb5964d14eb2cea0336e34ed4444cc2b" +
|
||||
"bdbd8ef5ba89a0a5e9e35a2e23b38d3f9136f42aefb25c2e7eae0b42c1d1ada5618c5299aedd469ce4f9353ccbae3f89110922b669b8d1b62e72aaf893b83ca264707efbefdcf22ef2333b01f18a849653b52925" +
|
||||
"63c37314bf34289b0636a2f8c24bc97fec554a9c31ec2cb4e30ba70fa965a17561e56739be138d86a4777f866ca24ba24f70913230e1b3ea34a9a90eea1b6a3a81b93286bb582a53e78557845a654775a18efb77" +
|
||||
"eee098d2680bc4ceb866874f31c7fadd70262cca6039833522de03cb2527dc5cfc7072db48b6011b852d705c7b24ffedf52facf352ab2512c625811db7965edc87d08f7f27e02665c9a6a42968e4c58cd86aa847" +
|
||||
"69658153b62f208b2dcfbcb364d63e6671cf60698640"),
|
||||
20,
|
||||
},
|
||||
{
|
||||
fromHex("0120000000000000000000000000007000000000000000000000000000000DEF"),
|
||||
fromHex("000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("ba6bce79c4f79c815b7fec53840ff0549ff5496378aa1f6ba481a48a5b9b8dbea8b820eccbc4eca37e1050fc53510a746037d2707f81e9683ec3f495b02ad0f848d7f9bf67bc" +
|
||||
"6299be525d1bf3bfd9953caa12cc4e1d5a6969e6fcd5d3c3e3d9f2e735cd7808755ddda7b22a3ae6040e7f8d05d62661a97d84dad694c69637aea3ae0af9f73303ffce3ae6161281d7a3c7e50a5706d766b34ddd" +
|
||||
"eab6974fdab10b3f48fb31f26df72e54c616edf1afc019f240c059a7c003677008227f49b021bc23c9c51d6f85ad136a4aa4950d9692f7094d344d88c05868691eb620d39bd8154986c971a8c9552ff0015fd78a" +
|
||||
"6bdd33df94b0056786a1e0ceb9cc9a38a31fbba224c1fb82bf6af376f67e94337a730301a6365d49b0dd56328e0269cbdfb5bcbccf1c7c3f4922ec1310aa2ef8136be788a55190453d3d3153b1b960a16f79365a" +
|
||||
"0bc7d6d2d5cda9f0993dbb815ee72f83b9d2ed296598fb21d91c29d1acf4ff0a549784a1d6a4f0935ee18efbf41fdc98d81c449544e9701d92648c06e5f416833b90d15fd4c04fc720a5ec6c6fc8b3d85a66826a" +
|
||||
"5e6817e21c4c4c0d7151b128236c41397ad4c6549e827c42269659973c153db70ffc33951b19ff21428091cea3836f72f88082508bae1839b59fa9c2556bdf373419d3cf29a8fad4d1787d829ad884f9927228fc" +
|
||||
"0b8bb7f1a067e7bdbf06c3885154f76f5be0cde8c7c59442b72b0e3f0341afe644e7eb4c29a467288aebc893e17b446c63da7551b8b59ebdd0cbcd65bc79a969bd3397f83d149840de731df4c09a833d5bd9feda" +
|
||||
"e1cd78a09b233b020de86ab71b9fd425adf84e502cef7c62015eade66ca91b0a90306894b53c7c5147e524d7b919ccdd0731e4eef8fe476b6eed38c91b611cd1777b9acf6eee0a11eaff16ae872db92a5d133fe7" +
|
||||
"bed999882da283893dd1e96f530be3cd36bf38c16deed2cd77651b6e0d3628de3cb86a78f1d07f6fc79434da5f73888be617b84595acef154f66b95ade1a3e120421a9dac6eec1e5b60139da3d604a03d4a9b7a3" +
|
||||
"0810a9c7d551aa8df08e11544486ad33000bfe410e8e6f35cb9d22806a5fcacefc6a1257d373d426243576fad9b20ad5ba84befc1a47c79d7bd2923b5776d3df86c8ed98b700d317502849ec8c02ecb8513a7a32" +
|
||||
"e2db15e75a814f12cfc20429ae06cae2021406b4f174ce56dca65f7994a3b2722e764520a52f87d0a887fc771dbfbf381b4f750dc074fedec1a43a4df37a5a2c148f89d9630ebbd1be1858bed10207cdacae9a0a" +
|
||||
"b92df58de53de4718f929a83474fbcf9969f1d28a5b257cacd56f0ff0bc425c93d8c91ac833c2cfefb97d82fe6236f3ec3c29e0112a6cac5abfec733db41265f8ff486e7d7fa0b3d9766357377f089056c9408d8" +
|
||||
"2f09f18700236cc1058ea1c273e287d07d521fdbb5e28d41cc1d95999eccee"),
|
||||
20,
|
||||
},
|
||||
}
|
41
vendor/github.com/aead/chacha20/chacha20.go
generated
vendored
Normal file
41
vendor/github.com/aead/chacha20/chacha20.go
generated
vendored
Normal file
|
@ -0,0 +1,41 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Package chacha20 implements the ChaCha20 / XChaCha20 stream chipher.
|
||||
// Notice that one specific key-nonce combination must be unique for all time.
|
||||
//
|
||||
// There are three versions of ChaCha20:
|
||||
// - ChaCha20 with a 64 bit nonce (en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
|
||||
// - ChaCha20 with a 96 bit nonce (en/decrypt up to 2^32 * 64 bytes (~256 GB) for one key-nonce combination)
|
||||
// - XChaCha20 with a 192 bit nonce (en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
|
||||
package chacha20 // import "github.com/aead/chacha20"
|
||||
|
||||
import (
|
||||
"crypto/cipher"
|
||||
|
||||
"github.com/aead/chacha20/chacha"
|
||||
)
|
||||
|
||||
// XORKeyStream crypts bytes from src to dst using the given nonce and key.
|
||||
// The length of the nonce determinds the version of ChaCha20:
|
||||
// - 8 bytes: ChaCha20 with a 64 bit nonce and a 2^64 * 64 byte period.
|
||||
// - 12 bytes: ChaCha20 as defined in RFC 7539 and a 2^32 * 64 byte period.
|
||||
// - 24 bytes: XChaCha20 with a 192 bit nonce and a 2^64 * 64 byte period.
|
||||
// Src and dst may be the same slice but otherwise should not overlap.
|
||||
// If len(dst) < len(src) this function panics.
|
||||
// If the nonce is neither 64, 96 nor 192 bits long, this function panics.
|
||||
func XORKeyStream(dst, src, nonce, key []byte) {
|
||||
chacha.XORKeyStream(dst, src, nonce, key, 20)
|
||||
}
|
||||
|
||||
// NewCipher returns a new cipher.Stream implementing a ChaCha20 version.
|
||||
// The nonce must be unique for one key for all time.
|
||||
// The length of the nonce determinds the version of ChaCha20:
|
||||
// - 8 bytes: ChaCha20 with a 64 bit nonce and a 2^64 * 64 byte period.
|
||||
// - 12 bytes: ChaCha20 as defined in RFC 7539 and a 2^32 * 64 byte period.
|
||||
// - 24 bytes: XChaCha20 with a 192 bit nonce and a 2^64 * 64 byte period.
|
||||
// If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned.
|
||||
func NewCipher(nonce, key []byte) (cipher.Stream, error) {
|
||||
return chacha.NewCipher(nonce, key, 20)
|
||||
}
|
108
vendor/github.com/aead/chacha20/chacha20_test.go
generated
vendored
Normal file
108
vendor/github.com/aead/chacha20/chacha20_test.go
generated
vendored
Normal file
|
@ -0,0 +1,108 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package chacha20
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
"testing"
|
||||
|
||||
"github.com/aead/chacha20/chacha"
|
||||
)
|
||||
|
||||
func toHex(bits []byte) string {
|
||||
return hex.EncodeToString(bits)
|
||||
}
|
||||
|
||||
func fromHex(bits string) []byte {
|
||||
b, err := hex.DecodeString(bits)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func TestVectors(t *testing.T) {
|
||||
for i, v := range vectors {
|
||||
if len(v.plaintext) == 0 {
|
||||
v.plaintext = make([]byte, len(v.ciphertext))
|
||||
}
|
||||
|
||||
dst := make([]byte, len(v.ciphertext))
|
||||
|
||||
XORKeyStream(dst, v.plaintext, v.nonce, v.key)
|
||||
if !bytes.Equal(dst, v.ciphertext) {
|
||||
t.Errorf("Test %d: ciphertext mismatch:\n \t got: %s\n \t want: %s", i, toHex(dst), toHex(v.ciphertext))
|
||||
}
|
||||
|
||||
c, err := NewCipher(v.nonce, v.key)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
c.XORKeyStream(dst[:1], v.plaintext[:1])
|
||||
c.XORKeyStream(dst[1:], v.plaintext[1:])
|
||||
if !bytes.Equal(dst, v.ciphertext) {
|
||||
t.Errorf("Test %d: ciphertext mismatch:\n \t got: %s\n \t want: %s", i, toHex(dst), toHex(v.ciphertext))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkCipher(b *testing.B, size int, nonceSize int) {
|
||||
var key [32]byte
|
||||
nonce := make([]byte, nonceSize)
|
||||
c, _ := NewCipher(nonce, key[:])
|
||||
buf := make([]byte, size)
|
||||
|
||||
b.SetBytes(int64(len(buf)))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
c.XORKeyStream(buf, buf)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkXORKeyStream(b *testing.B, size int, nonceSize int) {
|
||||
var key [32]byte
|
||||
nonce := make([]byte, nonceSize)
|
||||
buf := make([]byte, size)
|
||||
b.SetBytes(int64(len(buf)))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
XORKeyStream(buf, buf, nonce[:], key[:])
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkChaCha20_64(b *testing.B) { benchmarkCipher(b, 64, chacha.NonceSize) }
|
||||
func BenchmarkChaCha20_1K(b *testing.B) { benchmarkCipher(b, 1024, chacha.NonceSize) }
|
||||
func BenchmarkXChaCha20_64(b *testing.B) { benchmarkXORKeyStream(b, 64, chacha.XNonceSize) }
|
||||
func BenchmarkXChaCha20_1K(b *testing.B) { benchmarkXORKeyStream(b, 1024, chacha.XNonceSize) }
|
||||
func BenchmarkXORKeyStream64(b *testing.B) { benchmarkXORKeyStream(b, 64, chacha.NonceSize) }
|
||||
func BenchmarkXORKeyStream1K(b *testing.B) { benchmarkXORKeyStream(b, 1024, chacha.NonceSize) }
|
||||
func BenchmarkXChaCha20_XORKeyStream64(b *testing.B) { benchmarkXORKeyStream(b, 64, chacha.XNonceSize) }
|
||||
func BenchmarkXChaCha20_XORKeyStream1K(b *testing.B) {
|
||||
benchmarkXORKeyStream(b, 1024, chacha.XNonceSize)
|
||||
}
|
||||
|
||||
var vectors = []struct {
|
||||
key, nonce, plaintext, ciphertext []byte
|
||||
}{
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("0000000000000000"),
|
||||
nil,
|
||||
fromHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586"),
|
||||
},
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586"),
|
||||
},
|
||||
{
|
||||
fromHex("0000000000000000000000000000000000000000000000000000000000000000"),
|
||||
fromHex("000000000000000000000000000000000000000000000000"),
|
||||
nil,
|
||||
fromHex("bcd02a18bf3f01d19292de30a7a8fdaca4b65e50a6002cc72cd6d2f7c91ac3d5728f83e0aad2bfcf9abd2d2db58faedd65015dd83fc09b131e271043019e8e0f"),
|
||||
},
|
||||
}
|
25
vendor/github.com/aead/poly1305/.gitignore
generated
vendored
Normal file
25
vendor/github.com/aead/poly1305/.gitignore
generated
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
.vscode
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
16
vendor/github.com/aead/poly1305/.travis.yml
generated
vendored
Normal file
16
vendor/github.com/aead/poly1305/.travis.yml
generated
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.7
|
||||
- 1.8
|
||||
|
||||
env:
|
||||
- TRAVIS_GOARCH=amd64
|
||||
- TRAVIS_GOARCH=386
|
||||
|
||||
before_install:
|
||||
- export GOARCH=$TRAVIS_GOARCH
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
21
vendor/github.com/aead/poly1305/LICENSE
generated
vendored
Normal file
21
vendor/github.com/aead/poly1305/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Andreas Auernhammer
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
48
vendor/github.com/aead/poly1305/README.md
generated
vendored
Normal file
48
vendor/github.com/aead/poly1305/README.md
generated
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
[](https://godoc.org/github.com/aead/poly1305)
|
||||
|
||||
## The poly1305 message authentication code
|
||||
|
||||
Poly1305 is a fast, one-time authentication function created by Daniel J. Bernstein.
|
||||
It is infeasible for an attacker to generate an authenticator for a message without the key.
|
||||
However, a key must only be used for a single message. Authenticating two different messages
|
||||
with the same key allows an attacker to forge authenticators for other messages with the same key.
|
||||
|
||||
### Installation
|
||||
Install in your GOPATH: `go get -u github.com/aead/poly1305`
|
||||
|
||||
### Requirements
|
||||
All Go versions >= 1.7 are supported.
|
||||
|
||||
### Performance
|
||||
|
||||
#### AMD64
|
||||
Hardware: Intel i7-6500U 2.50GHz x 2
|
||||
System: Linux Ubuntu 16.04 - kernel: 4.4.0-62-generic
|
||||
Go version: 1.8.0
|
||||
|
||||
**AVX2**
|
||||
```
|
||||
name speed cpb
|
||||
Sum_64-4 1.60GB/s ± 0% 1.39
|
||||
Sum_256-4 2.32GB/s ± 1% 1.00
|
||||
Sum_1K-4 3.61GB/s ± 1% 0.65
|
||||
Sum_8K-4 4.20GB/s ± 1% 0.55
|
||||
Write_64-4 2.04GB/s ± 0% 1.14
|
||||
Write_256-4 3.50GB/s ± 2% 0.67
|
||||
Write_1K-4 4.08GB/s ± 2% 0.57
|
||||
Write_8K-4 4.25GB/s ± 2% 0.55
|
||||
```
|
||||
|
||||
**x64**
|
||||
|
||||
```
|
||||
name speed cpb
|
||||
Sum_64-4 1.60GB/s ± 1% 1.46
|
||||
Sum_256-4 2.11GB/s ± 3% 1.10
|
||||
Sum_1K-4 2.35GB/s ±13% 0.99
|
||||
Sum_8K-4 2.47GB/s ±13% 0.94
|
||||
Write_64-4 1.81GB/s ± 5% 1.29
|
||||
Write_256-4 2.24GB/s ± 4% 1.04
|
||||
Write_1K-4 2.55GB/s ± 0% 0.91
|
||||
Write_8K-4 2.63GB/s ± 0% 0.88
|
||||
```
|
30
vendor/github.com/aead/poly1305/poly1305.go
generated
vendored
Normal file
30
vendor/github.com/aead/poly1305/poly1305.go
generated
vendored
Normal file
|
@ -0,0 +1,30 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Package poly1305 implements Poly1305 one-time message authentication code
|
||||
// defined in RFC 7539..
|
||||
//
|
||||
// Poly1305 is a fast, one-time authentication function. It is infeasible for an
|
||||
// attacker to generate an authenticator for a message without the key.
|
||||
// However, a key must only be used for a single message. Authenticating two
|
||||
// different messages with the same key allows an attacker to forge
|
||||
// authenticators for other messages with the same key.
|
||||
package poly1305 // import "github.com/aead/poly1305"
|
||||
|
||||
import (
|
||||
"crypto/subtle"
|
||||
"errors"
|
||||
)
|
||||
|
||||
// TagSize is the size of the poly1305 authentication tag in bytes.
|
||||
const TagSize = 16
|
||||
|
||||
var errWriteAfterSum = errors.New("checksum already computed - adding more data is not allowed")
|
||||
|
||||
// Verify returns true if and only if the mac is a valid authenticator
|
||||
// for msg with the given key.
|
||||
func Verify(mac *[TagSize]byte, msg []byte, key [32]byte) bool {
|
||||
sum := Sum(msg, key)
|
||||
return subtle.ConstantTimeCompare(sum[:], mac[:]) == 1
|
||||
}
|
871
vendor/github.com/aead/poly1305/poly1305_AVX2_amd64.s
generated
vendored
Normal file
871
vendor/github.com/aead/poly1305/poly1305_AVX2_amd64.s
generated
vendored
Normal file
|
@ -0,0 +1,871 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// This code is inspired by the poly1305 AVX2 implementation by Shay Gueron, and Vlad Krasnov.
|
||||
|
||||
// +build amd64, !gccgo, !appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA addMaskAVX2<>+0x00(SB)/8, $0x3FFFFFF
|
||||
DATA addMaskAVX2<>+0x08(SB)/8, $0x3FFFFFF
|
||||
DATA addMaskAVX2<>+0x10(SB)/8, $0x3FFFFFF
|
||||
DATA addMaskAVX2<>+0x18(SB)/8, $0x3FFFFFF
|
||||
GLOBL addMaskAVX2<>(SB), RODATA, $32
|
||||
|
||||
DATA poly1305MaskAVX2<>+0x00(SB)/8, $0xFFFFFFC0FFFFFFF
|
||||
DATA poly1305MaskAVX2<>+0x08(SB)/8, $0xFFFFFFC0FFFFFFF
|
||||
DATA poly1305MaskAVX2<>+0x10(SB)/8, $0xFFFFFFC0FFFFFFF
|
||||
DATA poly1305MaskAVX2<>+0x18(SB)/8, $0xFFFFFFC0FFFFFFF
|
||||
DATA poly1305MaskAVX2<>+0x20(SB)/8, $0xFFFFFFC0FFFFFFC
|
||||
DATA poly1305MaskAVX2<>+0x28(SB)/8, $0xFFFFFFC0FFFFFFC
|
||||
DATA poly1305MaskAVX2<>+0x30(SB)/8, $0xFFFFFFC0FFFFFFC
|
||||
DATA poly1305MaskAVX2<>+0x38(SB)/8, $0xFFFFFFC0FFFFFFC
|
||||
GLOBL poly1305MaskAVX2<>(SB), RODATA, $64
|
||||
|
||||
DATA oneBit<>+0x00(SB)/8, $0x1000000
|
||||
DATA oneBit<>+0x08(SB)/8, $0x1000000
|
||||
DATA oneBit<>+0x10(SB)/8, $0x1000000
|
||||
DATA oneBit<>+0x18(SB)/8, $0x1000000
|
||||
GLOBL oneBit<>(SB), RODATA, $32
|
||||
|
||||
DATA fixPermutation<>+0x00(SB)/4, $6
|
||||
DATA fixPermutation<>+0x04(SB)/4, $7
|
||||
DATA fixPermutation<>+0x08(SB)/4, $6
|
||||
DATA fixPermutation<>+0x0c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x10(SB)/4, $6
|
||||
DATA fixPermutation<>+0x14(SB)/4, $7
|
||||
DATA fixPermutation<>+0x18(SB)/4, $6
|
||||
DATA fixPermutation<>+0x1c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x20(SB)/4, $4
|
||||
DATA fixPermutation<>+0x24(SB)/4, $5
|
||||
DATA fixPermutation<>+0x28(SB)/4, $6
|
||||
DATA fixPermutation<>+0x2c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x30(SB)/4, $6
|
||||
DATA fixPermutation<>+0x34(SB)/4, $7
|
||||
DATA fixPermutation<>+0x38(SB)/4, $6
|
||||
DATA fixPermutation<>+0x3c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x40(SB)/4, $2
|
||||
DATA fixPermutation<>+0x44(SB)/4, $3
|
||||
DATA fixPermutation<>+0x48(SB)/4, $6
|
||||
DATA fixPermutation<>+0x4c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x50(SB)/4, $4
|
||||
DATA fixPermutation<>+0x54(SB)/4, $5
|
||||
DATA fixPermutation<>+0x58(SB)/4, $6
|
||||
DATA fixPermutation<>+0x5c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x60(SB)/4, $0
|
||||
DATA fixPermutation<>+0x64(SB)/4, $1
|
||||
DATA fixPermutation<>+0x68(SB)/4, $4
|
||||
DATA fixPermutation<>+0x6c(SB)/4, $5
|
||||
DATA fixPermutation<>+0x70(SB)/4, $2
|
||||
DATA fixPermutation<>+0x74(SB)/4, $3
|
||||
DATA fixPermutation<>+0x78(SB)/4, $6
|
||||
DATA fixPermutation<>+0x7c(SB)/4, $7
|
||||
GLOBL fixPermutation<>(SB), RODATA, $128
|
||||
|
||||
TEXT ·initializeAVX2(SB), $0-16
|
||||
MOVQ state+0(FP), DI
|
||||
MOVQ key+8(FP), SI
|
||||
|
||||
MOVQ $addMaskAVX2<>(SB), R8
|
||||
|
||||
MOVOU 16*1(SI), X10
|
||||
MOVOU X10, 288(DI)
|
||||
PXOR X10, X10
|
||||
MOVOU X10, 304(DI)
|
||||
|
||||
MOVD X10, 320(DI)
|
||||
MOVQ 8*0(SI), X5
|
||||
MOVQ 8*1(SI), X10
|
||||
|
||||
VZEROUPPER
|
||||
|
||||
MOVQ $poly1305MaskAVX2<>(SB), R9
|
||||
VPAND (R9), X5, X5
|
||||
VPAND 32(R9), X10, X10
|
||||
|
||||
VMOVDQU 0(R8), X0
|
||||
VPSRLQ $26, X5, X6
|
||||
VPAND X0, X5, X5
|
||||
VPSRLQ $26, X6, X7
|
||||
VPAND X0, X6, X6
|
||||
VPSLLQ $12, X10, X11
|
||||
VPXOR X11, X7, X7
|
||||
VPSRLQ $26, X7, X8
|
||||
VPSRLQ $40, X10, X9
|
||||
VPAND X0, X7, X7
|
||||
VPAND X0, X8, X8
|
||||
|
||||
BYTE $0xc5; BYTE $0xd1; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ X5, X5, X0
|
||||
BYTE $0xc5; BYTE $0xd1; BYTE $0xf4; BYTE $0xce // VPMULUDQ X6, X5, X1
|
||||
BYTE $0xc5; BYTE $0xd1; BYTE $0xf4; BYTE $0xd7 // VPMULUDQ X7, X5, X2
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x51; BYTE $0xf4; BYTE $0xd8 // VPMULUDQ X8, X5, X3
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x51; BYTE $0xf4; BYTE $0xe1 // VPMULUDQ X9, X5, X4
|
||||
|
||||
VPSLLQ $1, X1, X1
|
||||
VPSLLQ $1, X2, X2
|
||||
BYTE $0xc5; BYTE $0x49; BYTE $0xf4; BYTE $0xd6 // VPMULUDQ X6, X6, X10
|
||||
VPADDQ X10, X2, X2
|
||||
BYTE $0xc5; BYTE $0x49; BYTE $0xf4; BYTE $0xd7 // VPMULUDQ X7, X6, X10
|
||||
VPADDQ X10, X3, X3
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x49; BYTE $0xf4; BYTE $0xd0 // VPMULUDQ X8, X6, X10
|
||||
VPADDQ X10, X4, X4
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x49; BYTE $0xf4; BYTE $0xe1 // VPMULUDQ X9, X6, X12
|
||||
VPSLLQ $1, X3, X3
|
||||
VPSLLQ $1, X4, X4
|
||||
BYTE $0xc5; BYTE $0x41; BYTE $0xf4; BYTE $0xd7 // VPMULUDQ X7, X7, X10
|
||||
VPADDQ X10, X4, X4
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x41; BYTE $0xf4; BYTE $0xd0 // VPMULUDQ X8, X7, X10
|
||||
VPADDQ X10, X12, X12
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x41; BYTE $0xf4; BYTE $0xe9 // VPMULUDQ X9, X7, X13
|
||||
VPSLLQ $1, X12, X12
|
||||
VPSLLQ $1, X13, X13
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x39; BYTE $0xf4; BYTE $0xd0 // VPMULUDQ X8, X8, X10
|
||||
VPADDQ X10, X13, X13
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x39; BYTE $0xf4; BYTE $0xf1 // VPMULUDQ X9, X8, X14
|
||||
VPSLLQ $1, X14, X14
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x31; BYTE $0xf4; BYTE $0xf9 // VPMULUDQ X9, X9, X15
|
||||
|
||||
VPSRLQ $26, X4, X10
|
||||
VPAND 0(R8), X4, X4
|
||||
VPADDQ X10, X12, X12
|
||||
|
||||
VPSLLQ $2, X12, X10
|
||||
VPADDQ X10, X12, X12
|
||||
VPSLLQ $2, X13, X10
|
||||
VPADDQ X10, X13, X13
|
||||
VPSLLQ $2, X14, X10
|
||||
VPADDQ X10, X14, X14
|
||||
VPSLLQ $2, X15, X10
|
||||
VPADDQ X10, X15, X15
|
||||
|
||||
VPADDQ X12, X0, X0
|
||||
VPADDQ X13, X1, X1
|
||||
VPADDQ X14, X2, X2
|
||||
VPADDQ X15, X3, X3
|
||||
|
||||
VPSRLQ $26, X0, X10
|
||||
VPAND 0(R8), X0, X0
|
||||
VPADDQ X10, X1, X1
|
||||
VPSRLQ $26, X1, X10
|
||||
VPAND 0(R8), X1, X1
|
||||
VPADDQ X10, X2, X2
|
||||
VPSRLQ $26, X2, X10
|
||||
VPAND 0(R8), X2, X2
|
||||
VPADDQ X10, X3, X3
|
||||
VPSRLQ $26, X3, X10
|
||||
VPAND 0(R8), X3, X3
|
||||
VPADDQ X10, X4, X4
|
||||
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6c; BYTE $0xed // VPUNPCKLQDQ X5, X0, X5
|
||||
BYTE $0xc5; BYTE $0xf1; BYTE $0x6c; BYTE $0xf6 // VPUNPCKLQDQ X6, X1, X6
|
||||
BYTE $0xc5; BYTE $0xe9; BYTE $0x6c; BYTE $0xff // VPUNPCKLQDQ X7, X2, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ X8, X3, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ X9, X4, X9
|
||||
|
||||
VMOVDQU X5, 0+16(DI)
|
||||
VMOVDQU X6, 32+16(DI)
|
||||
VMOVDQU X7, 64+16(DI)
|
||||
VMOVDQU X8, 96+16(DI)
|
||||
VMOVDQU X9, 128+16(DI)
|
||||
|
||||
VPSLLQ $2, X6, X1
|
||||
VPSLLQ $2, X7, X2
|
||||
VPSLLQ $2, X8, X3
|
||||
VPSLLQ $2, X9, X4
|
||||
|
||||
VPADDQ X1, X6, X1
|
||||
VPADDQ X2, X7, X2
|
||||
VPADDQ X3, X8, X3
|
||||
VPADDQ X4, X9, X4
|
||||
|
||||
VMOVDQU X1, 160+16(DI)
|
||||
VMOVDQU X2, 192+16(DI)
|
||||
VMOVDQU X3, 224+16(DI)
|
||||
VMOVDQU X4, 256+16(DI)
|
||||
|
||||
VPSHUFD $68, X5, X0
|
||||
VPSHUFD $68, X6, X1
|
||||
VPSHUFD $68, X7, X2
|
||||
VPSHUFD $68, X8, X3
|
||||
VPSHUFD $68, X9, X4
|
||||
|
||||
VMOVDQU 0+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x79; BYTE $0xf4; BYTE $0xea // VPMULUDQ X10, X0, X5
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x71; BYTE $0xf4; BYTE $0xf2 // VPMULUDQ X10, X1, X6
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x69; BYTE $0xf4; BYTE $0xfa // VPMULUDQ X10, X2, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xc2 // VPMULUDQ X10, X3, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xca // VPMULUDQ X10, X4, X9
|
||||
|
||||
VMOVDQU 160+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X4, X11
|
||||
VPADDQ X11, X5, X5
|
||||
|
||||
VMOVDQU 32+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X0, X11
|
||||
VPADDQ X11, X6, X6
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X1, X11
|
||||
VPADDQ X11, X7, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X2, X11
|
||||
VPADDQ X11, X8, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X3, X11
|
||||
VPADDQ X11, X9, X9
|
||||
|
||||
VMOVDQU 192+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X3, X11
|
||||
VPADDQ X11, X5, X5
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X4, X11
|
||||
VPADDQ X11, X6, X6
|
||||
|
||||
VMOVDQU 64+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X0, X11
|
||||
VPADDQ X11, X7, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X1, X11
|
||||
VPADDQ X11, X8, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X2, X11
|
||||
VPADDQ X11, X9, X9
|
||||
|
||||
VMOVDQU 224+16(DI), X10
|
||||
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X2, X11
|
||||
VPADDQ X11, X5, X5
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X3, X11
|
||||
VPADDQ X11, X6, X6
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X4, X11
|
||||
VPADDQ X11, X7, X7
|
||||
|
||||
VMOVDQU 96+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X0, X11
|
||||
VPADDQ X11, X8, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X1, X11
|
||||
VPADDQ X11, X9, X9
|
||||
|
||||
VMOVDQU 256+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X1, X11
|
||||
VPADDQ X11, X5, X5
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X2, X11
|
||||
VPADDQ X11, X6, X6
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X3, X11
|
||||
VPADDQ X11, X7, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X4, X11
|
||||
VPADDQ X11, X8, X8
|
||||
|
||||
VMOVDQU 128+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X0, X11
|
||||
VPADDQ X11, X9, X9
|
||||
|
||||
VMOVDQU 0(R8), X12
|
||||
|
||||
VPSRLQ $26, X8, X10
|
||||
VPADDQ X10, X9, X9
|
||||
VPAND X12, X8, X8
|
||||
VPSRLQ $26, X9, X10
|
||||
VPSLLQ $2, X10, X11
|
||||
VPADDQ X11, X10, X10
|
||||
VPADDQ X10, X5, X5
|
||||
VPAND X12, X9, X9
|
||||
VPSRLQ $26, X5, X10
|
||||
VPAND X12, X5, X5
|
||||
VPADDQ X10, X6, X6
|
||||
VPSRLQ $26, X6, X10
|
||||
VPAND X12, X6, X6
|
||||
VPADDQ X10, X7, X7
|
||||
VPSRLQ $26, X7, X10
|
||||
VPAND X12, X7, X7
|
||||
VPADDQ X10, X8, X8
|
||||
VPSRLQ $26, X8, X10
|
||||
VPAND X12, X8, X8
|
||||
VPADDQ X10, X9, X9
|
||||
|
||||
VMOVDQU X5, 0(DI)
|
||||
VMOVDQU X6, 32(DI)
|
||||
VMOVDQU X7, 64(DI)
|
||||
VMOVDQU X8, 96(DI)
|
||||
VMOVDQU X9, 128(DI)
|
||||
|
||||
VPSLLQ $2, X6, X1
|
||||
VPSLLQ $2, X7, X2
|
||||
VPSLLQ $2, X8, X3
|
||||
VPSLLQ $2, X9, X4
|
||||
|
||||
VPADDQ X1, X6, X1
|
||||
VPADDQ X2, X7, X2
|
||||
VPADDQ X3, X8, X3
|
||||
VPADDQ X4, X9, X4
|
||||
|
||||
VMOVDQU X1, 160(DI)
|
||||
VMOVDQU X2, 192(DI)
|
||||
VMOVDQU X3, 224(DI)
|
||||
VMOVDQU X4, 256(DI)
|
||||
|
||||
RET
|
||||
|
||||
TEXT ·updateAVX2(SB), $0-24
|
||||
MOVQ state+0(FP), DI
|
||||
MOVQ msg+8(FP), SI
|
||||
MOVQ msg_len+16(FP), DX
|
||||
|
||||
MOVD 304(DI), X0
|
||||
MOVD 308(DI), X1
|
||||
MOVD 312(DI), X2
|
||||
MOVD 316(DI), X3
|
||||
MOVD 320(DI), X4
|
||||
|
||||
MOVQ $addMaskAVX2<>(SB), R12
|
||||
MOVQ $oneBit<>(SB), R13
|
||||
MOVQ $fixPermutation<>(SB), R15
|
||||
VZEROUPPER
|
||||
|
||||
VMOVDQA (R12), Y12
|
||||
|
||||
CMPQ DX, $128
|
||||
JB BETWEEN_0_AND_128
|
||||
|
||||
AT_LEAST_128:
|
||||
VMOVDQU 32*0(SI), Y9
|
||||
VMOVDQU 32*1(SI), Y10
|
||||
ADDQ $64, SI
|
||||
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10,Y9,Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10,Y9,Y8
|
||||
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xff; BYTE $0xd8 // VPERMQ $216,Y7,Y7
|
||||
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0; BYTE $0xd8 // VPERMQ $216,Y8,Y8
|
||||
|
||||
VPSRLQ $26, Y7, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y0, Y0
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPAND Y12, Y9, Y9
|
||||
VPADDQ Y9, Y1, Y1
|
||||
|
||||
VPSLLQ $12, Y8, Y9
|
||||
VPXOR Y9, Y7, Y7
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y2, Y2
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPSRLQ $40, Y8, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPXOR (R13), Y9, Y9
|
||||
VPADDQ Y7, Y3, Y3
|
||||
VPADDQ Y9, Y4, Y4
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x2f // VPBROADCASTQ 0(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xfd // VPMULUDQ Y5, Y0, Y7
|
||||
BYTE $0xc5; BYTE $0x75; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ Y5, Y1, Y8
|
||||
BYTE $0xc5; BYTE $0x6d; BYTE $0xf4; BYTE $0xcd // VPMULUDQ Y5, Y2, Y9
|
||||
BYTE $0xc5; BYTE $0x65; BYTE $0xf4; BYTE $0xd5 // VPMULUDQ Y5, Y3, Y10
|
||||
BYTE $0xc5; BYTE $0x5d; BYTE $0xf4; BYTE $0xdd // VPMULUDQ Y5, Y4, Y11
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0xa0; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 160(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x6f; BYTE $0x20 // VPBROADCASTQ 32(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0xc0; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 192(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x6f; BYTE $0x40 // VPBROADCASTQ 64(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0xe0; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 224(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x6f; BYTE $0x60 // VPBROADCASTQ 96(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0x00; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 256(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2,Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0x80; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 128(DI),Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPADDQ Y5, Y11, Y11
|
||||
VPAND Y12, Y10, Y10
|
||||
|
||||
VPSRLQ $26, Y11, Y5
|
||||
VPSLLQ $2, Y5, Y6
|
||||
VPADDQ Y6, Y5, Y5
|
||||
VPADDQ Y5, Y7, Y7
|
||||
VPAND Y12, Y11, Y11
|
||||
|
||||
VPSRLQ $26, Y7, Y5
|
||||
VPAND Y12, Y7, Y0
|
||||
VPADDQ Y5, Y8, Y8
|
||||
VPSRLQ $26, Y8, Y5
|
||||
VPAND Y12, Y8, Y1
|
||||
VPADDQ Y5, Y9, Y9
|
||||
VPSRLQ $26, Y9, Y5
|
||||
VPAND Y12, Y9, Y2
|
||||
VPADDQ Y5, Y10, Y10
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPAND Y12, Y10, Y3
|
||||
VPADDQ Y5, Y11, Y4
|
||||
|
||||
SUBQ $64, DX
|
||||
CMPQ DX, $128
|
||||
JAE AT_LEAST_128
|
||||
|
||||
BETWEEN_0_AND_128:
|
||||
CMPQ DX, $64
|
||||
JB BETWEEN_0_AND_64
|
||||
|
||||
VMOVDQU 32*0(SI), Y9
|
||||
VMOVDQU 32*1(SI), Y10
|
||||
ADDQ $64, SI
|
||||
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
|
||||
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xff; BYTE $0xd8 // VPERMQ $216, Y7, Y7
|
||||
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0; BYTE $0xd8 // VPERMQ $216, Y8, Y8
|
||||
|
||||
VPSRLQ $26, Y7, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y0, Y0
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPAND Y12, Y9, Y9
|
||||
VPADDQ Y9, Y1, Y1
|
||||
|
||||
VPSLLQ $12, Y8, Y9
|
||||
VPXOR Y9, Y7, Y7
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y2, Y2
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPSRLQ $40, Y8, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPXOR (R13), Y9, Y9
|
||||
VPADDQ Y7, Y3, Y3
|
||||
VPADDQ Y9, Y4, Y4
|
||||
|
||||
VMOVDQU 0(DI), Y5
|
||||
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xfd // VPMULUDQ Y5, Y0, Y7
|
||||
BYTE $0xc5; BYTE $0x75; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ Y5, Y1, Y8
|
||||
BYTE $0xc5; BYTE $0x6d; BYTE $0xf4; BYTE $0xcd // VPMULUDQ Y5, Y2, Y9
|
||||
BYTE $0xc5; BYTE $0x65; BYTE $0xf4; BYTE $0xd5 // VPMULUDQ Y5, Y3, Y10
|
||||
BYTE $0xc5; BYTE $0x5d; BYTE $0xf4; BYTE $0xdd // VPMULUDQ Y5, Y4, Y11
|
||||
|
||||
VMOVDQU 160(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
VMOVDQU 32(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 192(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
|
||||
VMOVDQU 64(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 224(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
|
||||
VMOVDQU 96(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 256(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
|
||||
VMOVDQU 128(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPADDQ Y5, Y11, Y11
|
||||
VPAND Y12, Y10, Y10
|
||||
VPSRLQ $26, Y11, Y5
|
||||
VPSLLQ $2, Y5, Y6
|
||||
VPADDQ Y6, Y5, Y5
|
||||
VPADDQ Y5, Y7, Y7
|
||||
VPAND Y12, Y11, Y11
|
||||
VPSRLQ $26, Y7, Y5
|
||||
VPAND Y12, Y7, Y0
|
||||
VPADDQ Y5, Y8, Y8
|
||||
VPSRLQ $26, Y8, Y5
|
||||
VPAND Y12, Y8, Y1
|
||||
VPADDQ Y5, Y9, Y9
|
||||
VPSRLQ $26, Y9, Y5
|
||||
VPAND Y12, Y9, Y2
|
||||
VPADDQ Y5, Y10, Y10
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPAND Y12, Y10, Y3
|
||||
VPADDQ Y5, Y11, Y4
|
||||
|
||||
VPSRLDQ $8, Y0, Y7
|
||||
VPSRLDQ $8, Y1, Y8
|
||||
VPSRLDQ $8, Y2, Y9
|
||||
VPSRLDQ $8, Y3, Y10
|
||||
VPSRLDQ $8, Y4, Y11
|
||||
|
||||
VPADDQ Y7, Y0, Y0
|
||||
VPADDQ Y8, Y1, Y1
|
||||
VPADDQ Y9, Y2, Y2
|
||||
VPADDQ Y10, Y3, Y3
|
||||
VPADDQ Y11, Y4, Y4
|
||||
|
||||
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xf8; BYTE $0xaa // VPERMQ $170, Y0, Y7
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xc1; BYTE $0xaa // VPERMQ $170, Y1, Y8
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xca; BYTE $0xaa // VPERMQ $170, Y2, Y9
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xd3; BYTE $0xaa // VPERMQ $170, Y3, Y10
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xdc; BYTE $0xaa // VPERMQ $170, Y4, Y11
|
||||
|
||||
VPADDQ Y7, Y0, Y0
|
||||
VPADDQ Y8, Y1, Y1
|
||||
VPADDQ Y9, Y2, Y2
|
||||
VPADDQ Y10, Y3, Y3
|
||||
VPADDQ Y11, Y4, Y4
|
||||
SUBQ $64, DX
|
||||
|
||||
BETWEEN_0_AND_64:
|
||||
TESTQ DX, DX
|
||||
JZ DONE
|
||||
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xc0 // VMOVQ X0, X0
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xc9 // VMOVQ X1, X1
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xd2 // VMOVQ X2, X2
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xdb // VMOVQ X3, X3
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xe4 // VMOVQ X4, X4
|
||||
|
||||
MOVQ (R13), BX
|
||||
MOVQ SP, AX
|
||||
TESTQ $15, DX
|
||||
JZ FULL_BLOCKS
|
||||
|
||||
SUBQ $64, SP
|
||||
VPXOR Y7, Y7, Y7
|
||||
VMOVDQU Y7, (SP)
|
||||
VMOVDQU Y7, 32(SP)
|
||||
|
||||
XORQ BX, BX
|
||||
|
||||
FLUSH_BUFFER:
|
||||
MOVB (SI)(BX*1), CX
|
||||
MOVB CX, (SP)(BX*1)
|
||||
INCQ BX
|
||||
CMPQ DX, BX
|
||||
JNE FLUSH_BUFFER
|
||||
|
||||
MOVB $1, (SP)(BX*1)
|
||||
XORQ BX, BX
|
||||
MOVQ SP, SI
|
||||
|
||||
FULL_BLOCKS:
|
||||
CMPQ DX, $16
|
||||
JA AT_LEAST_16
|
||||
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0x3e // VMOVQ 8*0(SI), X7
|
||||
BYTE $0xc5; BYTE $0x7a; BYTE $0x7e; BYTE $0x46; BYTE $0x08 // VMOVQ 8*1(SI), X8
|
||||
BYTE $0xc4; BYTE $0x61; BYTE $0xf9; BYTE $0x6e; BYTE $0xf3 // VMOVQ BX ,X14
|
||||
VMOVDQA (R15), Y13
|
||||
JMP MULTIPLY
|
||||
|
||||
AT_LEAST_16:
|
||||
CMPQ DX, $32
|
||||
JA AT_LEAST_32
|
||||
VMOVDQU 16*0(SI), X9
|
||||
VMOVDQU 16*1(SI), X10
|
||||
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x7a; BYTE $0x7e; BYTE $0x75; BYTE $0x00 // VMOVQ (R13), X14
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0xf3; BYTE $0x01 // VPINSRQ $1,BX, X14, X14
|
||||
VMOVDQA 32(R15), Y13
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
|
||||
JMP MULTIPLY
|
||||
|
||||
AT_LEAST_32:
|
||||
CMPQ DX, $48
|
||||
JA AT_LEAST_48
|
||||
VMOVDQU 32*0(SI), Y9
|
||||
VMOVDQU 32*1(SI), X10
|
||||
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x7a; BYTE $0x7e; BYTE $0x75; BYTE $0x00 // VMOVQ 0(R13), X14
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0xf3; BYTE $0x01 // VPINSRQ $1, BX, X14, X14
|
||||
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xf6; BYTE $0xc4 // VPERMQ $196, Y14, Y14
|
||||
VMOVDQA 64(R15), Y13
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
|
||||
JMP MULTIPLY
|
||||
|
||||
AT_LEAST_48:
|
||||
VMOVDQU 32*0(SI), Y9
|
||||
VMOVDQU 32*1(SI), Y10
|
||||
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x7a; BYTE $0x7e; BYTE $0x75; BYTE $0x00 // VMOVQ (R13),X14
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0xf3; BYTE $0x01 // VPINSRQ $1,BX,X14,X14
|
||||
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xf6; BYTE $0x40 // VPERMQ $64,Y14,Y14
|
||||
VMOVDQA 96(R15), Y13
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
|
||||
|
||||
MULTIPLY:
|
||||
MOVQ AX, SP
|
||||
|
||||
VPSRLQ $26, Y7, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y0, Y0
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPAND Y12, Y9, Y9
|
||||
VPADDQ Y9, Y1, Y1
|
||||
|
||||
VPSLLQ $12, Y8, Y9
|
||||
VPXOR Y9, Y7, Y7
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y2, Y2
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPSRLQ $40, Y8, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPXOR Y14, Y9, Y9
|
||||
VPADDQ Y7, Y3, Y3
|
||||
VPADDQ Y9, Y4, Y4
|
||||
|
||||
VMOVDQU 0(DI), Y5
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xfd // VPMULUDQ Y5, Y0, Y7
|
||||
BYTE $0xc5; BYTE $0x75; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ Y5, Y1, Y8
|
||||
BYTE $0xc5; BYTE $0x6d; BYTE $0xf4; BYTE $0xcd // VPMULUDQ Y5, Y2, Y9
|
||||
BYTE $0xc5; BYTE $0x65; BYTE $0xf4; BYTE $0xd5 // VPMULUDQ Y5, Y3, Y10
|
||||
BYTE $0xc5; BYTE $0x5d; BYTE $0xf4; BYTE $0xdd // VPMULUDQ Y5, Y4, Y11
|
||||
|
||||
VMOVDQU 160(DI), Y5
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
VMOVDQU 32(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 192(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
|
||||
VMOVDQU 64(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 224(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
|
||||
VMOVDQU 96(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 256(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1,Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
|
||||
VMOVDQU 128(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPADDQ Y5, Y11, Y11
|
||||
VPAND Y12, Y10, Y10
|
||||
VPSRLQ $26, Y11, Y5
|
||||
VPSLLQ $2, Y5, Y6
|
||||
VPADDQ Y6, Y5, Y5
|
||||
VPADDQ Y5, Y7, Y7
|
||||
VPAND Y12, Y11, Y11
|
||||
VPSRLQ $26, Y7, Y5
|
||||
VPAND Y12, Y7, Y0
|
||||
VPADDQ Y5, Y8, Y8
|
||||
VPSRLQ $26, Y8, Y5
|
||||
VPAND Y12, Y8, Y1
|
||||
VPADDQ Y5, Y9, Y9
|
||||
VPSRLQ $26, Y9, Y5
|
||||
VPAND Y12, Y9, Y2
|
||||
VPADDQ Y5, Y10, Y10
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPAND Y12, Y10, Y3
|
||||
VPADDQ Y5, Y11, Y4
|
||||
|
||||
VPSRLDQ $8, Y0, Y7
|
||||
VPSRLDQ $8, Y1, Y8
|
||||
VPSRLDQ $8, Y2, Y9
|
||||
VPSRLDQ $8, Y3, Y10
|
||||
VPSRLDQ $8, Y4, Y11
|
||||
|
||||
VPADDQ Y7, Y0, Y0
|
||||
VPADDQ Y8, Y1, Y1
|
||||
VPADDQ Y9, Y2, Y2
|
||||
VPADDQ Y10, Y3, Y3
|
||||
VPADDQ Y11, Y4, Y4
|
||||
|
||||
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xf8; BYTE $0xaa // VPERMQ $170, Y0, Y7
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xc1; BYTE $0xaa // VPERMQ $170, Y1, Y8
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xca; BYTE $0xaa // VPERMQ $170, Y2, Y9
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xd3; BYTE $0xaa // VPERMQ $170, Y3, Y10
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xdc; BYTE $0xaa // VPERMQ $170, Y4, Y11
|
||||
|
||||
VPADDQ Y7, Y0, Y0
|
||||
VPADDQ Y8, Y1, Y1
|
||||
VPADDQ Y9, Y2, Y2
|
||||
VPADDQ Y10, Y3, Y3
|
||||
VPADDQ Y11, Y4, Y4
|
||||
|
||||
DONE:
|
||||
VZEROUPPER
|
||||
MOVD X0, 304(DI)
|
||||
MOVD X1, 308(DI)
|
||||
MOVD X2, 312(DI)
|
||||
MOVD X3, 316(DI)
|
||||
MOVD X4, 320(DI)
|
||||
RET
|
||||
|
||||
TEXT ·finalizeAVX2(SB), $0-16
|
||||
MOVQ out+0(FP), SI
|
||||
MOVQ state+8(FP), DI
|
||||
|
||||
VZEROUPPER
|
||||
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x87; BYTE $0x30; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 304(DI), X0
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x8f; BYTE $0x34; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 308(DI), X1
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x97; BYTE $0x38; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 312(DI), X2
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x9f; BYTE $0x3c; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 316(DI), X3
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0xa7; BYTE $0x40; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 320(DI), X4
|
||||
|
||||
VMOVDQU addMaskAVX2<>(SB), X7
|
||||
|
||||
VPSRLQ $26, X4, X5
|
||||
VPSLLQ $2, X5, X6
|
||||
VPADDQ X6, X5, X5
|
||||
VPADDQ X5, X0, X0
|
||||
VPAND X7, X4, X4
|
||||
|
||||
VPSRLQ $26, X0, X5
|
||||
VPAND X7, X0, X0
|
||||
VPADDQ X5, X1, X1
|
||||
VPSRLQ $26, X1, X5
|
||||
VPAND X7, X1, X1
|
||||
VPADDQ X5, X2, X2
|
||||
VPSRLQ $26, X2, X5
|
||||
VPAND X7, X2, X2
|
||||
VPADDQ X5, X3, X3
|
||||
VPSRLQ $26, X3, X5
|
||||
VPAND X7, X3, X3
|
||||
VPADDQ X5, X4, X4
|
||||
|
||||
VPSLLQ $26, X1, X5
|
||||
VPXOR X5, X0, X0
|
||||
VPSLLQ $52, X2, X5
|
||||
VPXOR X5, X0, X0
|
||||
VPSRLQ $12, X2, X1
|
||||
VPSLLQ $14, X3, X5
|
||||
VPXOR X5, X1, X1
|
||||
VPSLLQ $40, X4, X5
|
||||
VPXOR X5, X1, X1
|
||||
|
||||
VZEROUPPER
|
||||
|
||||
MOVQ X0, AX
|
||||
MOVQ X1, BX
|
||||
|
||||
ADDQ 288(DI), AX
|
||||
ADCQ 288+8(DI), BX
|
||||
MOVQ AX, (SI)
|
||||
MOVQ BX, 8(SI)
|
||||
|
||||
RET
|
197
vendor/github.com/aead/poly1305/poly1305_amd64.go
generated
vendored
Normal file
197
vendor/github.com/aead/poly1305/poly1305_amd64.go
generated
vendored
Normal file
|
@ -0,0 +1,197 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build amd64, !gccgo, !appengine
|
||||
|
||||
package poly1305
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
var useAVX2 = supportsAVX2()
|
||||
|
||||
//go:noescape
|
||||
func supportsAVX2() bool
|
||||
|
||||
//go:noescape
|
||||
func initialize(state *[7]uint64, key *[32]byte)
|
||||
|
||||
//go:noescape
|
||||
func initializeAVX2(state *[512]byte, key *[32]byte)
|
||||
|
||||
//go:noescape
|
||||
func update(state *[7]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func updateAVX2(state *[512]byte, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func finalize(tag *[TagSize]byte, state *[7]uint64)
|
||||
|
||||
//go:noescape
|
||||
func finalizeAVX2(tag *[TagSize]byte, state *[512]byte)
|
||||
|
||||
// compiler asserts - check that poly1305Hash and poly1305HashAVX2 implements the hash interface
|
||||
var (
|
||||
_ (hash) = &poly1305Hash{}
|
||||
_ (hash) = &poly1305HashAVX2{}
|
||||
)
|
||||
|
||||
type hash interface {
|
||||
io.Writer
|
||||
|
||||
Sum(b []byte) []byte
|
||||
}
|
||||
|
||||
// Sum generates an authenticator for msg using a one-time key and returns the
|
||||
// 16-byte result. Authenticating two different messages with the same key allows
|
||||
// an attacker to forge messages at will.
|
||||
func Sum(msg []byte, key [32]byte) [TagSize]byte {
|
||||
if len(msg) == 0 {
|
||||
msg = []byte{}
|
||||
}
|
||||
var out [TagSize]byte
|
||||
if useAVX2 && len(msg) > 8*TagSize {
|
||||
var state [512]byte
|
||||
initializeAVX2(&state, &key)
|
||||
updateAVX2(&state, msg)
|
||||
finalizeAVX2(&out, &state)
|
||||
} else {
|
||||
var state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
|
||||
initialize(&state, &key)
|
||||
update(&state, msg)
|
||||
finalize(&out, &state)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// New returns a Hash computing the poly1305 sum.
|
||||
// Notice that Poly1305 is insecure if one key is used twice.
|
||||
func New(key [32]byte) *Hash {
|
||||
if useAVX2 {
|
||||
h := new(poly1305HashAVX2)
|
||||
initializeAVX2(&(h.state), &key)
|
||||
return &Hash{h, false}
|
||||
}
|
||||
h := new(poly1305Hash)
|
||||
initialize(&(h.state), &key)
|
||||
return &Hash{h, false}
|
||||
}
|
||||
|
||||
// Hash implements the poly1305 authenticator.
|
||||
// Poly1305 cannot be used like common hash.Hash implementations,
|
||||
// because using a poly1305 key twice breaks its security.
|
||||
type Hash struct {
|
||||
hash
|
||||
|
||||
done bool
|
||||
}
|
||||
|
||||
// Size returns the number of bytes Sum will append.
|
||||
func (h *Hash) Size() int { return TagSize }
|
||||
|
||||
// Write adds more data to the running Poly1305 hash.
|
||||
// This function should return a non-nil error if a call
|
||||
// to Write happens after a call to Sum. So it is not possible
|
||||
// to compute the checksum and than add more data.
|
||||
func (h *Hash) Write(msg []byte) (int, error) {
|
||||
if h.done {
|
||||
return 0, errWriteAfterSum
|
||||
}
|
||||
return h.hash.Write(msg)
|
||||
}
|
||||
|
||||
// Sum appends the Poly1305 hash of the previously
|
||||
// processed data to b and returns the resulting slice.
|
||||
// It is safe to call this function multiple times.
|
||||
func (h *Hash) Sum(b []byte) []byte {
|
||||
b = h.hash.Sum(b)
|
||||
h.done = true
|
||||
return b
|
||||
}
|
||||
|
||||
type poly1305Hash struct {
|
||||
state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
|
||||
|
||||
buf [TagSize]byte
|
||||
off int
|
||||
}
|
||||
|
||||
func (h *poly1305Hash) Write(p []byte) (n int, err error) {
|
||||
n = len(p)
|
||||
if h.off > 0 {
|
||||
dif := TagSize - h.off
|
||||
if n <= dif {
|
||||
h.off += copy(h.buf[h.off:], p)
|
||||
return n, nil
|
||||
}
|
||||
copy(h.buf[h.off:], p[:dif])
|
||||
update(&(h.state), h.buf[:])
|
||||
p = p[dif:]
|
||||
h.off = 0
|
||||
}
|
||||
// process full 16-byte blocks
|
||||
if nn := len(p) & (^(TagSize - 1)); nn > 0 {
|
||||
update(&(h.state), p[:nn])
|
||||
p = p[nn:]
|
||||
}
|
||||
if len(p) > 0 {
|
||||
h.off += copy(h.buf[h.off:], p)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (h *poly1305Hash) Sum(b []byte) []byte {
|
||||
var out [TagSize]byte
|
||||
state := h.state
|
||||
if h.off > 0 {
|
||||
update(&state, h.buf[:h.off])
|
||||
}
|
||||
finalize(&out, &state)
|
||||
return append(b, out[:]...)
|
||||
}
|
||||
|
||||
type poly1305HashAVX2 struct {
|
||||
// r[0] | r^2[0] | r[1] | r^2[1] | r[2] | r^2[2] | r[3] | r^2[3] | r[4] | r^2[4] | r[1]*5 | r^2[1]*5 | r[2]*5 | r^2[2]*5 r[3]*5 | r^2[3]*5 r[4]*5 | r^2[4]*5
|
||||
state [512]byte
|
||||
|
||||
buffer [8 * TagSize]byte
|
||||
offset int
|
||||
}
|
||||
|
||||
func (h *poly1305HashAVX2) Write(p []byte) (n int, err error) {
|
||||
n = len(p)
|
||||
if h.offset > 0 {
|
||||
remaining := 8*TagSize - h.offset
|
||||
if n <= remaining {
|
||||
h.offset += copy(h.buffer[h.offset:], p)
|
||||
return n, nil
|
||||
}
|
||||
copy(h.buffer[h.offset:], p[:remaining])
|
||||
updateAVX2(&h.state, h.buffer[:])
|
||||
p = p[remaining:]
|
||||
h.offset = 0
|
||||
}
|
||||
// process full 8*16-byte blocks
|
||||
if nn := len(p) & (^(8*TagSize - 1)); nn > 0 {
|
||||
updateAVX2(&h.state, p[:nn])
|
||||
p = p[nn:]
|
||||
}
|
||||
if len(p) > 0 {
|
||||
h.offset += copy(h.buffer[:], p)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (h *poly1305HashAVX2) Sum(b []byte) []byte {
|
||||
var out [TagSize]byte
|
||||
state := h.state
|
||||
|
||||
if h.offset > 0 {
|
||||
updateAVX2(&state, h.buffer[:h.offset])
|
||||
}
|
||||
finalizeAVX2(&out, &state)
|
||||
return append(b, out[:]...)
|
||||
}
|
155
vendor/github.com/aead/poly1305/poly1305_amd64.s
generated
vendored
Normal file
155
vendor/github.com/aead/poly1305/poly1305_amd64.s
generated
vendored
Normal file
|
@ -0,0 +1,155 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build amd64, !gccgo, !appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
|
||||
DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
|
||||
GLOBL ·poly1305Mask<>(SB), RODATA, $16
|
||||
|
||||
#define POLY1305_ADD(msg, h0, h1, h2) \
|
||||
ADDQ 0(msg), h0; \
|
||||
ADCQ 8(msg), h1; \
|
||||
ADCQ $1, h2; \
|
||||
LEAQ 16(msg), msg
|
||||
|
||||
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
|
||||
MOVQ r0, AX; \
|
||||
MULQ h0; \
|
||||
MOVQ AX, t0; \
|
||||
MOVQ DX, t1; \
|
||||
MOVQ r0, AX; \
|
||||
MULQ h1; \
|
||||
ADDQ AX, t1; \
|
||||
ADCQ $0, DX; \
|
||||
MOVQ r0, t2; \
|
||||
IMULQ h2, t2; \
|
||||
ADDQ DX, t2; \
|
||||
\
|
||||
MOVQ r1, AX; \
|
||||
MULQ h0; \
|
||||
ADDQ AX, t1; \
|
||||
ADCQ $0, DX; \
|
||||
MOVQ DX, h0; \
|
||||
MOVQ r1, t3; \
|
||||
IMULQ h2, t3; \
|
||||
MOVQ r1, AX; \
|
||||
MULQ h1; \
|
||||
ADDQ AX, t2; \
|
||||
ADCQ DX, t3; \
|
||||
ADDQ h0, t2; \
|
||||
ADCQ $0, t3; \
|
||||
\
|
||||
MOVQ t0, h0; \
|
||||
MOVQ t1, h1; \
|
||||
MOVQ t2, h2; \
|
||||
ANDQ $3, h2; \
|
||||
MOVQ t2, t0; \
|
||||
ANDQ $0XFFFFFFFFFFFFFFFC, t0; \
|
||||
ADDQ t0, h0; \
|
||||
ADCQ t3, h1; \
|
||||
ADCQ $0, h2; \
|
||||
SHRQ $2, t3, t2; \
|
||||
SHRQ $2, t3; \
|
||||
ADDQ t2, h0; \
|
||||
ADCQ t3, h1; \
|
||||
ADCQ $0, h2
|
||||
|
||||
// func update(state *[7]uint64, msg []byte)
|
||||
TEXT ·update(SB), $0-32
|
||||
MOVQ state+0(FP), DI
|
||||
MOVQ msg_base+8(FP), SI
|
||||
MOVQ msg_len+16(FP), R15
|
||||
|
||||
MOVQ 0(DI), R8 // h0
|
||||
MOVQ 8(DI), R9 // h1
|
||||
MOVQ 16(DI), R10 // h2
|
||||
MOVQ 24(DI), R11 // r0
|
||||
MOVQ 32(DI), R12 // h1
|
||||
|
||||
CMPQ R15, $16
|
||||
JB BYTES_BETWEEN_0_AND_15
|
||||
|
||||
LOOP:
|
||||
POLY1305_ADD(SI, R8, R9, R10)
|
||||
|
||||
MULTIPLY:
|
||||
POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
|
||||
SUBQ $16, R15
|
||||
CMPQ R15, $16
|
||||
JAE LOOP
|
||||
|
||||
BYTES_BETWEEN_0_AND_15:
|
||||
TESTQ R15, R15
|
||||
JZ DONE
|
||||
MOVQ $1, BX
|
||||
XORQ CX, CX
|
||||
XORQ R13, R13
|
||||
ADDQ R15, SI
|
||||
|
||||
FLUSH_BUFFER:
|
||||
SHLQ $8, BX, CX
|
||||
SHLQ $8, BX
|
||||
MOVB -1(SI), R13
|
||||
XORQ R13, BX
|
||||
DECQ SI
|
||||
DECQ R15
|
||||
JNZ FLUSH_BUFFER
|
||||
|
||||
ADDQ BX, R8
|
||||
ADCQ CX, R9
|
||||
ADCQ $0, R10
|
||||
MOVQ $16, R15
|
||||
JMP MULTIPLY
|
||||
|
||||
DONE:
|
||||
MOVQ R8, 0(DI)
|
||||
MOVQ R9, 8(DI)
|
||||
MOVQ R10, 16(DI)
|
||||
RET
|
||||
|
||||
// func initialize(state *[7]uint64, key *[32]byte)
|
||||
TEXT ·initialize(SB), $0-16
|
||||
MOVQ state+0(FP), DI
|
||||
MOVQ key+8(FP), SI
|
||||
|
||||
// state[0...7] is initialized with zero
|
||||
MOVOU 0(SI), X0
|
||||
MOVOU 16(SI), X1
|
||||
MOVOU ·poly1305Mask<>(SB), X2
|
||||
PAND X2, X0
|
||||
MOVOU X0, 24(DI)
|
||||
MOVOU X1, 40(DI)
|
||||
RET
|
||||
|
||||
// func finalize(tag *[TagSize]byte, state *[7]uint64)
|
||||
TEXT ·finalize(SB), $0-16
|
||||
MOVQ tag+0(FP), DI
|
||||
MOVQ state+8(FP), SI
|
||||
|
||||
MOVQ 0(SI), AX
|
||||
MOVQ 8(SI), BX
|
||||
MOVQ 16(SI), CX
|
||||
MOVQ AX, R8
|
||||
MOVQ BX, R9
|
||||
SUBQ $0XFFFFFFFFFFFFFFFB, AX
|
||||
SBBQ $0XFFFFFFFFFFFFFFFF, BX
|
||||
SBBQ $3, CX
|
||||
CMOVQCS R8, AX
|
||||
CMOVQCS R9, BX
|
||||
ADDQ 40(SI), AX
|
||||
ADCQ 48(SI), BX
|
||||
|
||||
MOVQ AX, 0(DI)
|
||||
MOVQ BX, 8(DI)
|
||||
RET
|
||||
|
||||
|
||||
// func supportsAVX2() bool
|
||||
TEXT ·supportsAVX2(SB), 4, $0-1
|
||||
MOVQ runtime·support_avx2(SB), AX
|
||||
MOVB AX, ret+0(FP)
|
||||
RET
|
229
vendor/github.com/aead/poly1305/poly1305_ref.go
generated
vendored
Normal file
229
vendor/github.com/aead/poly1305/poly1305_ref.go
generated
vendored
Normal file
|
@ -0,0 +1,229 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build !amd64 gccgo appengine nacl
|
||||
|
||||
package poly1305
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
const (
|
||||
msgBlock = uint32(1 << 24)
|
||||
finalBlock = uint32(0)
|
||||
)
|
||||
|
||||
// Sum generates an authenticator for msg using a one-time key and returns the
|
||||
// 16-byte result. Authenticating two different messages with the same key allows
|
||||
// an attacker to forge messages at will.
|
||||
func Sum(msg []byte, key [32]byte) [TagSize]byte {
|
||||
var (
|
||||
h, r [5]uint32
|
||||
s [4]uint32
|
||||
)
|
||||
var out [TagSize]byte
|
||||
|
||||
initialize(&r, &s, &key)
|
||||
|
||||
// process full 16-byte blocks
|
||||
n := len(msg) & (^(TagSize - 1))
|
||||
if n > 0 {
|
||||
update(msg[:n], msgBlock, &h, &r)
|
||||
msg = msg[n:]
|
||||
}
|
||||
if len(msg) > 0 {
|
||||
var block [TagSize]byte
|
||||
off := copy(block[:], msg)
|
||||
block[off] = 1
|
||||
update(block[:], finalBlock, &h, &r)
|
||||
}
|
||||
finalize(&out, &h, &s)
|
||||
return out
|
||||
}
|
||||
|
||||
// New returns a hash.Hash computing the poly1305 sum.
|
||||
// Notice that Poly1305 is insecure if one key is used twice.
|
||||
func New(key [32]byte) *Hash {
|
||||
p := new(Hash)
|
||||
initialize(&(p.r), &(p.s), &key)
|
||||
return p
|
||||
}
|
||||
|
||||
// Hash implements a Poly1305 writer interface.
|
||||
// Poly1305 cannot be used like common hash.Hash implementations,
|
||||
// because using a poly1305 key twice breaks its security.
|
||||
// So poly1305.Hash does not support some kind of reset.
|
||||
type Hash struct {
|
||||
h, r [5]uint32
|
||||
s [4]uint32
|
||||
|
||||
buf [TagSize]byte
|
||||
off int
|
||||
done bool
|
||||
}
|
||||
|
||||
// Size returns the number of bytes Sum will append.
|
||||
func (p *Hash) Size() int { return TagSize }
|
||||
|
||||
// Write adds more data to the running Poly1305 hash.
|
||||
// This function should return a non-nil error if a call
|
||||
// to Write happens after a call to Sum. So it is not possible
|
||||
// to compute the checksum and than add more data.
|
||||
func (p *Hash) Write(msg []byte) (int, error) {
|
||||
if p.done {
|
||||
return 0, errWriteAfterSum
|
||||
}
|
||||
n := len(msg)
|
||||
|
||||
if p.off > 0 {
|
||||
dif := TagSize - p.off
|
||||
if n <= dif {
|
||||
p.off += copy(p.buf[p.off:], msg)
|
||||
return n, nil
|
||||
}
|
||||
copy(p.buf[p.off:], msg[:dif])
|
||||
msg = msg[dif:]
|
||||
update(p.buf[:], msgBlock, &(p.h), &(p.r))
|
||||
p.off = 0
|
||||
}
|
||||
|
||||
// process full 16-byte blocks
|
||||
if nn := len(msg) & (^(TagSize - 1)); nn > 0 {
|
||||
update(msg[:nn], msgBlock, &(p.h), &(p.r))
|
||||
msg = msg[nn:]
|
||||
}
|
||||
|
||||
if len(msg) > 0 {
|
||||
p.off += copy(p.buf[p.off:], msg)
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Sum appends the Pol1305 hash of the previously
|
||||
// processed data to b and returns the resulting slice.
|
||||
// It is safe to call this function multiple times.
|
||||
func (p *Hash) Sum(b []byte) []byte {
|
||||
var out [TagSize]byte
|
||||
h := p.h
|
||||
|
||||
if p.off > 0 {
|
||||
var buf [TagSize]byte
|
||||
copy(buf[:], p.buf[:p.off])
|
||||
buf[p.off] = 1 // invariant: p.off < TagSize
|
||||
|
||||
update(buf[:], finalBlock, &h, &(p.r))
|
||||
}
|
||||
|
||||
finalize(&out, &h, &(p.s))
|
||||
p.done = true
|
||||
return append(b, out[:]...)
|
||||
}
|
||||
|
||||
func initialize(r *[5]uint32, s *[4]uint32, key *[32]byte) {
|
||||
r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff
|
||||
r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03
|
||||
r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff
|
||||
r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff
|
||||
r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff
|
||||
|
||||
s[0] = binary.LittleEndian.Uint32(key[16:])
|
||||
s[1] = binary.LittleEndian.Uint32(key[20:])
|
||||
s[2] = binary.LittleEndian.Uint32(key[24:])
|
||||
s[3] = binary.LittleEndian.Uint32(key[28:])
|
||||
}
|
||||
|
||||
func update(msg []byte, flag uint32, h, r *[5]uint32) {
|
||||
h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
|
||||
r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4])
|
||||
R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
|
||||
|
||||
for len(msg) > 0 {
|
||||
// h += msg
|
||||
h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
|
||||
h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
|
||||
h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
|
||||
h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
|
||||
h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag
|
||||
|
||||
// h *= r
|
||||
d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
|
||||
d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
|
||||
d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
|
||||
d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
|
||||
d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
|
||||
|
||||
// h %= p
|
||||
h0 = uint32(d0) & 0x3ffffff
|
||||
h1 = uint32(d1) & 0x3ffffff
|
||||
h2 = uint32(d2) & 0x3ffffff
|
||||
h3 = uint32(d3) & 0x3ffffff
|
||||
h4 = uint32(d4) & 0x3ffffff
|
||||
|
||||
h0 += uint32(d4>>26) * 5
|
||||
h1 += h0 >> 26
|
||||
h0 = h0 & 0x3ffffff
|
||||
|
||||
msg = msg[TagSize:]
|
||||
}
|
||||
|
||||
h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4
|
||||
}
|
||||
|
||||
func finalize(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
|
||||
h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
|
||||
|
||||
// h %= p reduction
|
||||
h2 += h1 >> 26
|
||||
h1 &= 0x3ffffff
|
||||
h3 += h2 >> 26
|
||||
h2 &= 0x3ffffff
|
||||
h4 += h3 >> 26
|
||||
h3 &= 0x3ffffff
|
||||
h0 += 5 * (h4 >> 26)
|
||||
h4 &= 0x3ffffff
|
||||
h1 += h0 >> 26
|
||||
h0 &= 0x3ffffff
|
||||
|
||||
// h - p
|
||||
t0 := h0 + 5
|
||||
t1 := h1 + (t0 >> 26)
|
||||
t2 := h2 + (t1 >> 26)
|
||||
t3 := h3 + (t2 >> 26)
|
||||
t4 := h4 + (t3 >> 26) - (1 << 26)
|
||||
t0 &= 0x3ffffff
|
||||
t1 &= 0x3ffffff
|
||||
t2 &= 0x3ffffff
|
||||
t3 &= 0x3ffffff
|
||||
|
||||
// select h if h < p else h - p
|
||||
t_mask := (t4 >> 31) - 1
|
||||
h_mask := ^t_mask
|
||||
h0 = (h0 & h_mask) | (t0 & t_mask)
|
||||
h1 = (h1 & h_mask) | (t1 & t_mask)
|
||||
h2 = (h2 & h_mask) | (t2 & t_mask)
|
||||
h3 = (h3 & h_mask) | (t3 & t_mask)
|
||||
h4 = (h4 & h_mask) | (t4 & t_mask)
|
||||
|
||||
// h %= 2^128
|
||||
h0 |= h1 << 26
|
||||
h1 = ((h1 >> 6) | (h2 << 20))
|
||||
h2 = ((h2 >> 12) | (h3 << 14))
|
||||
h3 = ((h3 >> 18) | (h4 << 8))
|
||||
|
||||
// s: the s part of the key
|
||||
// tag = (h + s) % (2^128)
|
||||
t := uint64(h0) + uint64(s[0])
|
||||
h0 = uint32(t)
|
||||
t = uint64(h1) + uint64(s[1]) + (t >> 32)
|
||||
h1 = uint32(t)
|
||||
t = uint64(h2) + uint64(s[2]) + (t >> 32)
|
||||
h2 = uint32(t)
|
||||
t = uint64(h3) + uint64(s[3]) + (t >> 32)
|
||||
h3 = uint32(t)
|
||||
|
||||
binary.LittleEndian.PutUint32(out[0:], h0)
|
||||
binary.LittleEndian.PutUint32(out[4:], h1)
|
||||
binary.LittleEndian.PutUint32(out[8:], h2)
|
||||
binary.LittleEndian.PutUint32(out[12:], h3)
|
||||
}
|
157
vendor/github.com/aead/poly1305/poly1305_test.go
generated
vendored
Normal file
157
vendor/github.com/aead/poly1305/poly1305_test.go
generated
vendored
Normal file
|
@ -0,0 +1,157 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package poly1305
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func fromHex(s string) []byte {
|
||||
b, err := hex.DecodeString(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
var vectors = []struct {
|
||||
msg, key, tag []byte
|
||||
}{
|
||||
{
|
||||
[]byte("Hello world!"),
|
||||
[]byte("this is 32-byte key for Poly1305"),
|
||||
[]byte{0xa6, 0xf7, 0x45, 0x00, 0x8f, 0x81, 0xc9, 0x16, 0xa2, 0x0d, 0xcc, 0x74, 0xee, 0xf2, 0xb2, 0xf0},
|
||||
},
|
||||
{
|
||||
make([]byte, 32),
|
||||
[]byte("this is 32-byte key for Poly1305"),
|
||||
[]byte{0x49, 0xec, 0x78, 0x09, 0x0e, 0x48, 0x1e, 0xc6, 0xc2, 0x6b, 0x33, 0xb9, 0x1c, 0xcc, 0x03, 0x07},
|
||||
},
|
||||
{
|
||||
make([]byte, 2007),
|
||||
[]byte("this is 32-byte key for Poly1305"),
|
||||
[]byte{0xda, 0x84, 0xbc, 0xab, 0x02, 0x67, 0x6c, 0x38, 0xcd, 0xb0, 0x15, 0x60, 0x42, 0x74, 0xc2, 0xaa},
|
||||
},
|
||||
{
|
||||
make([]byte, 2007),
|
||||
make([]byte, 32),
|
||||
make([]byte, 16),
|
||||
},
|
||||
{
|
||||
// This test triggers an edge-case. See https://go-review.googlesource.com/#/c/30101/.
|
||||
[]byte{0x81, 0xd8, 0xb2, 0xe4, 0x6a, 0x25, 0x21, 0x3b, 0x58, 0xfe, 0xe4, 0x21, 0x3a, 0x2a, 0x28, 0xe9, 0x21, 0xc1, 0x2a, 0x96, 0x32, 0x51, 0x6d, 0x3b, 0x73, 0x27, 0x27, 0x27, 0xbe, 0xcf, 0x21, 0x29},
|
||||
[]byte{0x3b, 0x3a, 0x29, 0xe9, 0x3b, 0x21, 0x3a, 0x5c, 0x5c, 0x3b, 0x3b, 0x05, 0x3a, 0x3a, 0x8c, 0x0d},
|
||||
[]byte{0x6d, 0xc1, 0x8b, 0x8c, 0x34, 0x4c, 0xd7, 0x99, 0x27, 0x11, 0x8b, 0xbe, 0x84, 0xb7, 0xf3, 0x14},
|
||||
},
|
||||
// From: https://tools.ietf.org/html/rfc7539#section-2.5.2
|
||||
{
|
||||
fromHex("43727970746f6772617068696320466f72756d2052657365617263682047726f7570"),
|
||||
fromHex("85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b"),
|
||||
fromHex("a8061dc1305136c6c22b8baf0c0127a9"),
|
||||
},
|
||||
}
|
||||
|
||||
func TestVectors(t *testing.T) {
|
||||
var key [32]byte
|
||||
|
||||
for i, v := range vectors {
|
||||
msg := v.msg
|
||||
copy(key[:], v.key)
|
||||
|
||||
out := Sum(msg, key)
|
||||
if !bytes.Equal(out[:], v.tag) {
|
||||
t.Errorf("Test vector %d : got: %x expected: %x", i, out[:], v.tag)
|
||||
}
|
||||
|
||||
h := New(key)
|
||||
h.Write(msg)
|
||||
tag := h.Sum(nil)
|
||||
if !bytes.Equal(tag[:], v.tag) {
|
||||
t.Errorf("Test vector %d : got: %x expected: %x", i, tag[:], v.tag)
|
||||
}
|
||||
|
||||
var mac [16]byte
|
||||
copy(mac[:], v.tag)
|
||||
if !Verify(&mac, msg, key) {
|
||||
t.Errorf("Test vector %d : Verify failed", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteAfterSum(t *testing.T) {
|
||||
msg := make([]byte, 64)
|
||||
for i := range msg {
|
||||
h := New([32]byte{})
|
||||
|
||||
if _, err := h.Write(msg[:i]); err != nil {
|
||||
t.Fatalf("Iteration %d: poly1305.Hash returned unexpected error: %s", i, err)
|
||||
}
|
||||
h.Sum(nil)
|
||||
if _, err := h.Write(nil); err == nil {
|
||||
t.Fatalf("Iteration %d: poly1305.Hash returned no error for write after sum", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWrite(t *testing.T) {
|
||||
var key [32]byte
|
||||
for i := range key {
|
||||
key[i] = byte(i)
|
||||
}
|
||||
|
||||
h := New(key)
|
||||
|
||||
var msg1 []byte
|
||||
msg0 := make([]byte, 64)
|
||||
for i := range msg0 {
|
||||
h.Write(msg0[:i])
|
||||
msg1 = append(msg1, msg0[:i]...)
|
||||
}
|
||||
|
||||
tag0 := h.Sum(nil)
|
||||
tag1 := Sum(msg1, key)
|
||||
|
||||
if !bytes.Equal(tag0[:], tag1[:]) {
|
||||
t.Fatalf("Sum differ from poly1305.Sum\n Sum: %s \n poly1305.Sum: %s", hex.EncodeToString(tag0[:]), hex.EncodeToString(tag1[:]))
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmarks
|
||||
|
||||
func BenchmarkSum_64(b *testing.B) { benchmarkSum(b, 64) }
|
||||
func BenchmarkSum_256(b *testing.B) { benchmarkSum(b, 256) }
|
||||
func BenchmarkSum_1K(b *testing.B) { benchmarkSum(b, 1024) }
|
||||
func BenchmarkSum_8K(b *testing.B) { benchmarkSum(b, 8*1024) }
|
||||
func BenchmarkWrite_64(b *testing.B) { benchmarkWrite(b, 64) }
|
||||
func BenchmarkWrite_256(b *testing.B) { benchmarkWrite(b, 256) }
|
||||
func BenchmarkWrite_1K(b *testing.B) { benchmarkWrite(b, 1024) }
|
||||
func BenchmarkWrite_8K(b *testing.B) { benchmarkWrite(b, 8*1024) }
|
||||
|
||||
func benchmarkSum(b *testing.B, size int) {
|
||||
var key [32]byte
|
||||
|
||||
msg := make([]byte, size)
|
||||
|
||||
b.SetBytes(int64(size))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
Sum(msg, key)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkWrite(b *testing.B, size int) {
|
||||
var key [32]byte
|
||||
h := New(key)
|
||||
|
||||
msg := make([]byte, size)
|
||||
|
||||
b.SetBytes(int64(size))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
h.Write(msg)
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue