mirror of
https://github.com/DNSCrypt/dnscrypt-proxy.git
synced 2025-04-04 21:57:44 +03:00
Update xsecretbox again
This commit is contained in:
parent
8213a96cd5
commit
81692a3a80
28 changed files with 33 additions and 4244 deletions
21
vendor/github.com/aead/chacha20/LICENSE
generated
vendored
21
vendor/github.com/aead/chacha20/LICENSE
generated
vendored
|
@ -1,21 +0,0 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Andreas Auernhammer
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
197
vendor/github.com/aead/chacha20/chacha/chacha.go
generated
vendored
197
vendor/github.com/aead/chacha20/chacha/chacha.go
generated
vendored
|
@ -1,197 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Package chacha implements some low-level functions of the
|
||||
// ChaCha cipher family.
|
||||
package chacha // import "github.com/aead/chacha20/chacha"
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"math"
|
||||
)
|
||||
|
||||
const (
|
||||
// NonceSize is the size of the ChaCha20 nonce in bytes.
|
||||
NonceSize = 8
|
||||
|
||||
// INonceSize is the size of the IETF-ChaCha20 nonce in bytes.
|
||||
INonceSize = 12
|
||||
|
||||
// XNonceSize is the size of the XChaCha20 nonce in bytes.
|
||||
XNonceSize = 24
|
||||
|
||||
// KeySize is the size of the key in bytes.
|
||||
KeySize = 32
|
||||
)
|
||||
|
||||
var (
|
||||
useSSE2 bool
|
||||
useSSSE3 bool
|
||||
useAVX bool
|
||||
useAVX2 bool
|
||||
)
|
||||
|
||||
var (
|
||||
errKeySize = errors.New("chacha20/chacha: bad key length")
|
||||
errInvalidNonce = errors.New("chacha20/chacha: bad nonce length")
|
||||
)
|
||||
|
||||
func setup(state *[64]byte, nonce, key []byte) (err error) {
|
||||
if len(key) != KeySize {
|
||||
err = errKeySize
|
||||
return
|
||||
}
|
||||
var Nonce [16]byte
|
||||
switch len(nonce) {
|
||||
case NonceSize:
|
||||
copy(Nonce[8:], nonce)
|
||||
initialize(state, key, &Nonce)
|
||||
case INonceSize:
|
||||
copy(Nonce[4:], nonce)
|
||||
initialize(state, key, &Nonce)
|
||||
case XNonceSize:
|
||||
var tmpKey [32]byte
|
||||
var hNonce [16]byte
|
||||
|
||||
copy(hNonce[:], nonce[:16])
|
||||
copy(tmpKey[:], key)
|
||||
HChaCha20(&tmpKey, &hNonce, &tmpKey)
|
||||
copy(Nonce[8:], nonce[16:])
|
||||
initialize(state, tmpKey[:], &Nonce)
|
||||
|
||||
// BUG(aead): A "good" compiler will remove this (optimizations)
|
||||
// But using the provided key instead of tmpKey,
|
||||
// will change the key (-> probably confuses users)
|
||||
for i := range tmpKey {
|
||||
tmpKey[i] = 0
|
||||
}
|
||||
default:
|
||||
err = errInvalidNonce
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// XORKeyStream crypts bytes from src to dst using the given nonce and key.
|
||||
// The length of the nonce determinds the version of ChaCha20:
|
||||
// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
|
||||
// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
|
||||
// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
|
||||
// The rounds argument specifies the number of rounds performed for keystream
|
||||
// generation - valid values are 8, 12 or 20. The src and dst may be the same slice
|
||||
// but otherwise should not overlap. If len(dst) < len(src) this function panics.
|
||||
// If the nonce is neither 64, 96 nor 192 bits long, this function panics.
|
||||
func XORKeyStream(dst, src, nonce, key []byte, rounds int) {
|
||||
if rounds != 20 && rounds != 12 && rounds != 8 {
|
||||
panic("chacha20/chacha: bad number of rounds")
|
||||
}
|
||||
if len(dst) < len(src) {
|
||||
panic("chacha20/chacha: dst buffer is to small")
|
||||
}
|
||||
if len(nonce) == INonceSize && uint64(len(src)) > (1<<38) {
|
||||
panic("chacha20/chacha: src is too large")
|
||||
}
|
||||
|
||||
var block, state [64]byte
|
||||
if err := setup(&state, nonce, key); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
xorKeyStream(dst, src, &block, &state, rounds)
|
||||
}
|
||||
|
||||
// Cipher implements ChaCha20/r (XChaCha20/r) for a given number of rounds r.
|
||||
type Cipher struct {
|
||||
state, block [64]byte
|
||||
off int
|
||||
rounds int // 20 for ChaCha20
|
||||
noncesize int
|
||||
}
|
||||
|
||||
// NewCipher returns a new *chacha.Cipher implementing the ChaCha20/r or XChaCha20/r
|
||||
// (r = 8, 12 or 20) stream cipher. The nonce must be unique for one key for all time.
|
||||
// The length of the nonce determinds the version of ChaCha20:
|
||||
// - NonceSize: ChaCha20/r with a 64 bit nonce and a 2^64 * 64 byte period.
|
||||
// - INonceSize: ChaCha20/r as defined in RFC 7539 and a 2^32 * 64 byte period.
|
||||
// - XNonceSize: XChaCha20/r with a 192 bit nonce and a 2^64 * 64 byte period.
|
||||
// If the nonce is neither 64, 96 nor 192 bits long, a non-nil error is returned.
|
||||
func NewCipher(nonce, key []byte, rounds int) (*Cipher, error) {
|
||||
if rounds != 20 && rounds != 12 && rounds != 8 {
|
||||
panic("chacha20/chacha: bad number of rounds")
|
||||
}
|
||||
|
||||
c := new(Cipher)
|
||||
if err := setup(&(c.state), nonce, key); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.rounds = rounds
|
||||
|
||||
if len(nonce) == INonceSize {
|
||||
c.noncesize = INonceSize
|
||||
} else {
|
||||
c.noncesize = NonceSize
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// XORKeyStream crypts bytes from src to dst. Src and dst may be the same slice
|
||||
// but otherwise should not overlap. If len(dst) < len(src) the function panics.
|
||||
func (c *Cipher) XORKeyStream(dst, src []byte) {
|
||||
if len(dst) < len(src) {
|
||||
panic("chacha20/chacha: dst buffer is to small")
|
||||
}
|
||||
|
||||
if c.off > 0 {
|
||||
n := len(c.block[c.off:])
|
||||
if len(src) <= n {
|
||||
for i, v := range src {
|
||||
dst[i] = v ^ c.block[c.off]
|
||||
c.off++
|
||||
}
|
||||
if c.off == 64 {
|
||||
c.off = 0
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
for i, v := range c.block[c.off:] {
|
||||
dst[i] = src[i] ^ v
|
||||
}
|
||||
src = src[n:]
|
||||
dst = dst[n:]
|
||||
c.off = 0
|
||||
}
|
||||
|
||||
// check for counter overflow
|
||||
blocksToXOR := len(src) / 64
|
||||
if len(src)%64 != 0 {
|
||||
blocksToXOR++
|
||||
}
|
||||
var overflow bool
|
||||
if c.noncesize == INonceSize {
|
||||
overflow = binary.LittleEndian.Uint32(c.state[48:]) > math.MaxUint32-uint32(blocksToXOR)
|
||||
} else {
|
||||
overflow = binary.LittleEndian.Uint64(c.state[48:]) > math.MaxUint64-uint64(blocksToXOR)
|
||||
}
|
||||
if overflow {
|
||||
panic("chacha20/chacha: counter overflow")
|
||||
}
|
||||
|
||||
c.off += xorKeyStream(dst, src, &(c.block), &(c.state), c.rounds)
|
||||
}
|
||||
|
||||
// SetCounter skips ctr * 64 byte blocks. SetCounter(0) resets the cipher.
|
||||
// This function always skips the unused keystream of the current 64 byte block.
|
||||
func (c *Cipher) SetCounter(ctr uint64) {
|
||||
if c.noncesize == INonceSize {
|
||||
binary.LittleEndian.PutUint32(c.state[48:], uint32(ctr))
|
||||
} else {
|
||||
binary.LittleEndian.PutUint64(c.state[48:], ctr)
|
||||
}
|
||||
c.off = 0
|
||||
}
|
||||
|
||||
// HChaCha20 generates 32 pseudo-random bytes from a 128 bit nonce and a 256 bit secret key.
|
||||
// It can be used as a key-derivation-function (KDF).
|
||||
func HChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) { hChaCha20(out, nonce, key) }
|
406
vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
generated
vendored
406
vendor/github.com/aead/chacha20/chacha/chachaAVX2_amd64.s
generated
vendored
|
@ -1,406 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build amd64,!gccgo,!appengine,!nacl
|
||||
|
||||
#include "const.s"
|
||||
#include "macro.s"
|
||||
|
||||
#define TWO 0(SP)
|
||||
#define C16 32(SP)
|
||||
#define C8 64(SP)
|
||||
#define STATE_0 96(SP)
|
||||
#define STATE_1 128(SP)
|
||||
#define STATE_2 160(SP)
|
||||
#define STATE_3 192(SP)
|
||||
#define TMP_0 224(SP)
|
||||
#define TMP_1 256(SP)
|
||||
|
||||
// func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
TEXT ·xorKeyStreamAVX2(SB), 4, $320-80
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ src_base+24(FP), SI
|
||||
MOVQ block+48(FP), BX
|
||||
MOVQ state+56(FP), AX
|
||||
MOVQ rounds+64(FP), DX
|
||||
MOVQ src_len+32(FP), CX
|
||||
|
||||
MOVQ SP, R8
|
||||
ADDQ $32, SP
|
||||
ANDQ $-32, SP
|
||||
|
||||
VMOVDQU 0(AX), Y2
|
||||
VMOVDQU 32(AX), Y3
|
||||
VPERM2I128 $0x22, Y2, Y0, Y0
|
||||
VPERM2I128 $0x33, Y2, Y1, Y1
|
||||
VPERM2I128 $0x22, Y3, Y2, Y2
|
||||
VPERM2I128 $0x33, Y3, Y3, Y3
|
||||
|
||||
TESTQ CX, CX
|
||||
JZ done
|
||||
|
||||
VMOVDQU ·one_AVX2<>(SB), Y4
|
||||
VPADDD Y4, Y3, Y3
|
||||
|
||||
VMOVDQA Y0, STATE_0
|
||||
VMOVDQA Y1, STATE_1
|
||||
VMOVDQA Y2, STATE_2
|
||||
VMOVDQA Y3, STATE_3
|
||||
|
||||
VMOVDQU ·rol16_AVX2<>(SB), Y4
|
||||
VMOVDQU ·rol8_AVX2<>(SB), Y5
|
||||
VMOVDQU ·two_AVX2<>(SB), Y6
|
||||
VMOVDQA Y4, Y14
|
||||
VMOVDQA Y5, Y15
|
||||
VMOVDQA Y4, C16
|
||||
VMOVDQA Y5, C8
|
||||
VMOVDQA Y6, TWO
|
||||
|
||||
CMPQ CX, $64
|
||||
JBE between_0_and_64
|
||||
CMPQ CX, $192
|
||||
JBE between_64_and_192
|
||||
CMPQ CX, $320
|
||||
JBE between_192_and_320
|
||||
CMPQ CX, $448
|
||||
JBE between_320_and_448
|
||||
|
||||
at_least_512:
|
||||
VMOVDQA Y0, Y4
|
||||
VMOVDQA Y1, Y5
|
||||
VMOVDQA Y2, Y6
|
||||
VPADDQ TWO, Y3, Y7
|
||||
VMOVDQA Y0, Y8
|
||||
VMOVDQA Y1, Y9
|
||||
VMOVDQA Y2, Y10
|
||||
VPADDQ TWO, Y7, Y11
|
||||
VMOVDQA Y0, Y12
|
||||
VMOVDQA Y1, Y13
|
||||
VMOVDQA Y2, Y14
|
||||
VPADDQ TWO, Y11, Y15
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_512:
|
||||
VMOVDQA Y8, TMP_0
|
||||
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8)
|
||||
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8)
|
||||
VMOVDQA TMP_0, Y8
|
||||
VMOVDQA Y0, TMP_0
|
||||
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8)
|
||||
CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8)
|
||||
CHACHA_SHUFFLE_AVX(Y1, Y2, Y3)
|
||||
CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
|
||||
CHACHA_SHUFFLE_AVX(Y9, Y10, Y11)
|
||||
CHACHA_SHUFFLE_AVX(Y13, Y14, Y15)
|
||||
|
||||
CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8)
|
||||
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8)
|
||||
VMOVDQA TMP_0, Y0
|
||||
VMOVDQA Y8, TMP_0
|
||||
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8)
|
||||
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8)
|
||||
VMOVDQA TMP_0, Y8
|
||||
CHACHA_SHUFFLE_AVX(Y3, Y2, Y1)
|
||||
CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
|
||||
CHACHA_SHUFFLE_AVX(Y11, Y10, Y9)
|
||||
CHACHA_SHUFFLE_AVX(Y15, Y14, Y13)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_512
|
||||
|
||||
VMOVDQA Y12, TMP_0
|
||||
VMOVDQA Y13, TMP_1
|
||||
VPADDD STATE_0, Y0, Y0
|
||||
VPADDD STATE_1, Y1, Y1
|
||||
VPADDD STATE_2, Y2, Y2
|
||||
VPADDD STATE_3, Y3, Y3
|
||||
XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
|
||||
VMOVDQA STATE_0, Y0
|
||||
VMOVDQA STATE_1, Y1
|
||||
VMOVDQA STATE_2, Y2
|
||||
VMOVDQA STATE_3, Y3
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD Y0, Y4, Y4
|
||||
VPADDD Y1, Y5, Y5
|
||||
VPADDD Y2, Y6, Y6
|
||||
VPADDD Y3, Y7, Y7
|
||||
XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD Y0, Y8, Y8
|
||||
VPADDD Y1, Y9, Y9
|
||||
VPADDD Y2, Y10, Y10
|
||||
VPADDD Y3, Y11, Y11
|
||||
XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD TMP_0, Y0, Y12
|
||||
VPADDD TMP_1, Y1, Y13
|
||||
VPADDD Y2, Y14, Y14
|
||||
VPADDD Y3, Y15, Y15
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
CMPQ CX, $512
|
||||
JB less_than_512
|
||||
|
||||
XOR_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
|
||||
VMOVDQA Y3, STATE_3
|
||||
ADDQ $512, SI
|
||||
ADDQ $512, DI
|
||||
SUBQ $512, CX
|
||||
CMPQ CX, $448
|
||||
JA at_least_512
|
||||
|
||||
TESTQ CX, CX
|
||||
JZ done
|
||||
|
||||
VMOVDQA C16, Y14
|
||||
VMOVDQA C8, Y15
|
||||
|
||||
CMPQ CX, $64
|
||||
JBE between_0_and_64
|
||||
CMPQ CX, $192
|
||||
JBE between_64_and_192
|
||||
CMPQ CX, $320
|
||||
JBE between_192_and_320
|
||||
JMP between_320_and_448
|
||||
|
||||
less_than_512:
|
||||
XOR_UPPER_AVX2(DI, SI, 384, Y12, Y13, Y14, Y15, Y4, Y5)
|
||||
EXTRACT_LOWER(BX, Y12, Y13, Y14, Y15, Y4)
|
||||
ADDQ $448, SI
|
||||
ADDQ $448, DI
|
||||
SUBQ $448, CX
|
||||
JMP finalize
|
||||
|
||||
between_320_and_448:
|
||||
VMOVDQA Y0, Y4
|
||||
VMOVDQA Y1, Y5
|
||||
VMOVDQA Y2, Y6
|
||||
VPADDQ TWO, Y3, Y7
|
||||
VMOVDQA Y0, Y8
|
||||
VMOVDQA Y1, Y9
|
||||
VMOVDQA Y2, Y10
|
||||
VPADDQ TWO, Y7, Y11
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_384:
|
||||
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
|
||||
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE_AVX(Y1, Y2, Y3)
|
||||
CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
|
||||
CHACHA_SHUFFLE_AVX(Y9, Y10, Y11)
|
||||
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
|
||||
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE_AVX(Y3, Y2, Y1)
|
||||
CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
|
||||
CHACHA_SHUFFLE_AVX(Y11, Y10, Y9)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_384
|
||||
|
||||
VPADDD STATE_0, Y0, Y0
|
||||
VPADDD STATE_1, Y1, Y1
|
||||
VPADDD STATE_2, Y2, Y2
|
||||
VPADDD STATE_3, Y3, Y3
|
||||
XOR_AVX2(DI, SI, 0, Y0, Y1, Y2, Y3, Y12, Y13)
|
||||
VMOVDQA STATE_0, Y0
|
||||
VMOVDQA STATE_1, Y1
|
||||
VMOVDQA STATE_2, Y2
|
||||
VMOVDQA STATE_3, Y3
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD Y0, Y4, Y4
|
||||
VPADDD Y1, Y5, Y5
|
||||
VPADDD Y2, Y6, Y6
|
||||
VPADDD Y3, Y7, Y7
|
||||
XOR_AVX2(DI, SI, 128, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
VPADDD Y0, Y8, Y8
|
||||
VPADDD Y1, Y9, Y9
|
||||
VPADDD Y2, Y10, Y10
|
||||
VPADDD Y3, Y11, Y11
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
CMPQ CX, $384
|
||||
JB less_than_384
|
||||
|
||||
XOR_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
SUBQ $384, CX
|
||||
TESTQ CX, CX
|
||||
JE done
|
||||
|
||||
ADDQ $384, SI
|
||||
ADDQ $384, DI
|
||||
JMP between_0_and_64
|
||||
|
||||
less_than_384:
|
||||
XOR_UPPER_AVX2(DI, SI, 256, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
|
||||
ADDQ $320, SI
|
||||
ADDQ $320, DI
|
||||
SUBQ $320, CX
|
||||
JMP finalize
|
||||
|
||||
between_192_and_320:
|
||||
VMOVDQA Y0, Y4
|
||||
VMOVDQA Y1, Y5
|
||||
VMOVDQA Y2, Y6
|
||||
VMOVDQA Y3, Y7
|
||||
VMOVDQA Y0, Y8
|
||||
VMOVDQA Y1, Y9
|
||||
VMOVDQA Y2, Y10
|
||||
VPADDQ TWO, Y3, Y11
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_256:
|
||||
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
|
||||
CHACHA_SHUFFLE_AVX(Y9, Y10, Y11)
|
||||
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
|
||||
CHACHA_SHUFFLE_AVX(Y11, Y10, Y9)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_256
|
||||
|
||||
VPADDD Y0, Y4, Y4
|
||||
VPADDD Y1, Y5, Y5
|
||||
VPADDD Y2, Y6, Y6
|
||||
VPADDD Y3, Y7, Y7
|
||||
VPADDQ TWO, Y3, Y3
|
||||
XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
VPADDD Y0, Y8, Y8
|
||||
VPADDD Y1, Y9, Y9
|
||||
VPADDD Y2, Y10, Y10
|
||||
VPADDD Y3, Y11, Y11
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
CMPQ CX, $256
|
||||
JB less_than_256
|
||||
|
||||
XOR_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
SUBQ $256, CX
|
||||
TESTQ CX, CX
|
||||
JE done
|
||||
|
||||
ADDQ $256, SI
|
||||
ADDQ $256, DI
|
||||
JMP between_0_and_64
|
||||
|
||||
less_than_256:
|
||||
XOR_UPPER_AVX2(DI, SI, 128, Y8, Y9, Y10, Y11, Y12, Y13)
|
||||
EXTRACT_LOWER(BX, Y8, Y9, Y10, Y11, Y12)
|
||||
ADDQ $192, SI
|
||||
ADDQ $192, DI
|
||||
SUBQ $192, CX
|
||||
JMP finalize
|
||||
|
||||
between_64_and_192:
|
||||
VMOVDQA Y0, Y4
|
||||
VMOVDQA Y1, Y5
|
||||
VMOVDQA Y2, Y6
|
||||
VMOVDQA Y3, Y7
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_128:
|
||||
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
|
||||
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
|
||||
CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_128
|
||||
|
||||
VPADDD Y0, Y4, Y4
|
||||
VPADDD Y1, Y5, Y5
|
||||
VPADDD Y2, Y6, Y6
|
||||
VPADDD Y3, Y7, Y7
|
||||
VPADDQ TWO, Y3, Y3
|
||||
|
||||
CMPQ CX, $128
|
||||
JB less_than_128
|
||||
|
||||
XOR_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
SUBQ $128, CX
|
||||
TESTQ CX, CX
|
||||
JE done
|
||||
|
||||
ADDQ $128, SI
|
||||
ADDQ $128, DI
|
||||
JMP between_0_and_64
|
||||
|
||||
less_than_128:
|
||||
XOR_UPPER_AVX2(DI, SI, 0, Y4, Y5, Y6, Y7, Y12, Y13)
|
||||
EXTRACT_LOWER(BX, Y4, Y5, Y6, Y7, Y13)
|
||||
ADDQ $64, SI
|
||||
ADDQ $64, DI
|
||||
SUBQ $64, CX
|
||||
JMP finalize
|
||||
|
||||
between_0_and_64:
|
||||
VMOVDQA X0, X4
|
||||
VMOVDQA X1, X5
|
||||
VMOVDQA X2, X6
|
||||
VMOVDQA X3, X7
|
||||
|
||||
MOVQ DX, R9
|
||||
|
||||
chacha_loop_64:
|
||||
CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15)
|
||||
CHACHA_SHUFFLE_AVX(X5, X6, X7)
|
||||
CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15)
|
||||
CHACHA_SHUFFLE_AVX(X7, X6, X5)
|
||||
SUBQ $2, R9
|
||||
JA chacha_loop_64
|
||||
|
||||
VPADDD X0, X4, X4
|
||||
VPADDD X1, X5, X5
|
||||
VPADDD X2, X6, X6
|
||||
VPADDD X3, X7, X7
|
||||
VMOVDQU ·one<>(SB), X0
|
||||
VPADDQ X0, X3, X3
|
||||
|
||||
CMPQ CX, $64
|
||||
JB less_than_64
|
||||
|
||||
XOR_AVX(DI, SI, 0, X4, X5, X6, X7, X13)
|
||||
SUBQ $64, CX
|
||||
JMP done
|
||||
|
||||
less_than_64:
|
||||
VMOVDQU X4, 0(BX)
|
||||
VMOVDQU X5, 16(BX)
|
||||
VMOVDQU X6, 32(BX)
|
||||
VMOVDQU X7, 48(BX)
|
||||
|
||||
finalize:
|
||||
XORQ R11, R11
|
||||
XORQ R12, R12
|
||||
MOVQ CX, BP
|
||||
|
||||
xor_loop:
|
||||
MOVB 0(SI), R11
|
||||
MOVB 0(BX), R12
|
||||
XORQ R11, R12
|
||||
MOVB R12, 0(DI)
|
||||
INCQ SI
|
||||
INCQ BX
|
||||
INCQ DI
|
||||
DECQ BP
|
||||
JA xor_loop
|
||||
|
||||
done:
|
||||
VMOVDQU X3, 48(AX)
|
||||
VZEROUPPER
|
||||
MOVQ R8, SP
|
||||
MOVQ CX, ret+72(FP)
|
||||
RET
|
||||
|
60
vendor/github.com/aead/chacha20/chacha/chacha_386.go
generated
vendored
60
vendor/github.com/aead/chacha20/chacha/chacha_386.go
generated
vendored
|
@ -1,60 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build 386,!gccgo,!appengine,!nacl
|
||||
|
||||
package chacha
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
|
||||
"golang.org/x/sys/cpu"
|
||||
)
|
||||
|
||||
func init() {
|
||||
useSSE2 = cpu.X86.HasSSE2
|
||||
useSSSE3 = cpu.X86.HasSSSE3
|
||||
useAVX = false
|
||||
useAVX2 = false
|
||||
}
|
||||
|
||||
func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
|
||||
binary.LittleEndian.PutUint32(state[0:], sigma[0])
|
||||
binary.LittleEndian.PutUint32(state[4:], sigma[1])
|
||||
binary.LittleEndian.PutUint32(state[8:], sigma[2])
|
||||
binary.LittleEndian.PutUint32(state[12:], sigma[3])
|
||||
copy(state[16:], key[:])
|
||||
copy(state[48:], nonce[:])
|
||||
}
|
||||
|
||||
// This function is implemented in chacha_386.s
|
||||
//go:noescape
|
||||
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_386.s
|
||||
//go:noescape
|
||||
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_386.s
|
||||
//go:noescape
|
||||
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
|
||||
switch {
|
||||
case useSSSE3:
|
||||
hChaCha20SSSE3(out, nonce, key)
|
||||
case useSSE2:
|
||||
hChaCha20SSE2(out, nonce, key)
|
||||
default:
|
||||
hChaCha20Generic(out, nonce, key)
|
||||
}
|
||||
}
|
||||
|
||||
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
|
||||
if useSSE2 {
|
||||
return xorKeyStreamSSE2(dst, src, block, state, rounds)
|
||||
} else {
|
||||
return xorKeyStreamGeneric(dst, src, block, state, rounds)
|
||||
}
|
||||
}
|
163
vendor/github.com/aead/chacha20/chacha/chacha_386.s
generated
vendored
163
vendor/github.com/aead/chacha20/chacha/chacha_386.s
generated
vendored
|
@ -1,163 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build 386,!gccgo,!appengine,!nacl
|
||||
|
||||
#include "const.s"
|
||||
#include "macro.s"
|
||||
|
||||
// FINALIZE xors len bytes from src and block using
|
||||
// the temp. registers t0 and t1 and writes the result
|
||||
// to dst.
|
||||
#define FINALIZE(dst, src, block, len, t0, t1) \
|
||||
XORL t0, t0; \
|
||||
XORL t1, t1; \
|
||||
FINALIZE_LOOP:; \
|
||||
MOVB 0(src), t0; \
|
||||
MOVB 0(block), t1; \
|
||||
XORL t0, t1; \
|
||||
MOVB t1, 0(dst); \
|
||||
INCL src; \
|
||||
INCL block; \
|
||||
INCL dst; \
|
||||
DECL len; \
|
||||
JG FINALIZE_LOOP \
|
||||
|
||||
#define Dst DI
|
||||
#define Nonce AX
|
||||
#define Key BX
|
||||
#define Rounds DX
|
||||
|
||||
// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
TEXT ·hChaCha20SSE2(SB), 4, $0-12
|
||||
MOVL out+0(FP), Dst
|
||||
MOVL nonce+4(FP), Nonce
|
||||
MOVL key+8(FP), Key
|
||||
|
||||
MOVOU ·sigma<>(SB), X0
|
||||
MOVOU 0*16(Key), X1
|
||||
MOVOU 1*16(Key), X2
|
||||
MOVOU 0*16(Nonce), X3
|
||||
MOVL $20, Rounds
|
||||
|
||||
chacha_loop:
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
|
||||
CHACHA_SHUFFLE_SSE(X1, X2, X3)
|
||||
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
|
||||
CHACHA_SHUFFLE_SSE(X3, X2, X1)
|
||||
SUBL $2, Rounds
|
||||
JNZ chacha_loop
|
||||
|
||||
MOVOU X0, 0*16(Dst)
|
||||
MOVOU X3, 1*16(Dst)
|
||||
RET
|
||||
|
||||
// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
TEXT ·hChaCha20SSSE3(SB), 4, $0-12
|
||||
MOVL out+0(FP), Dst
|
||||
MOVL nonce+4(FP), Nonce
|
||||
MOVL key+8(FP), Key
|
||||
|
||||
MOVOU ·sigma<>(SB), X0
|
||||
MOVOU 0*16(Key), X1
|
||||
MOVOU 1*16(Key), X2
|
||||
MOVOU 0*16(Nonce), X3
|
||||
MOVL $20, Rounds
|
||||
|
||||
MOVOU ·rol16<>(SB), X5
|
||||
MOVOU ·rol8<>(SB), X6
|
||||
|
||||
chacha_loop:
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
|
||||
CHACHA_SHUFFLE_SSE(X1, X2, X3)
|
||||
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
|
||||
CHACHA_SHUFFLE_SSE(X3, X2, X1)
|
||||
SUBL $2, Rounds
|
||||
JNZ chacha_loop
|
||||
|
||||
MOVOU X0, 0*16(Dst)
|
||||
MOVOU X3, 1*16(Dst)
|
||||
RET
|
||||
|
||||
#undef Dst
|
||||
#undef Nonce
|
||||
#undef Key
|
||||
#undef Rounds
|
||||
|
||||
#define State AX
|
||||
#define Dst DI
|
||||
#define Src SI
|
||||
#define Len DX
|
||||
#define Tmp0 BX
|
||||
#define Tmp1 BP
|
||||
|
||||
// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
TEXT ·xorKeyStreamSSE2(SB), 4, $0-40
|
||||
MOVL dst_base+0(FP), Dst
|
||||
MOVL src_base+12(FP), Src
|
||||
MOVL state+28(FP), State
|
||||
MOVL src_len+16(FP), Len
|
||||
MOVL $0, ret+36(FP) // Number of bytes written to the keystream buffer - 0 iff len mod 64 == 0
|
||||
|
||||
MOVOU 0*16(State), X0
|
||||
MOVOU 1*16(State), X1
|
||||
MOVOU 2*16(State), X2
|
||||
MOVOU 3*16(State), X3
|
||||
TESTL Len, Len
|
||||
JZ DONE
|
||||
|
||||
GENERATE_KEYSTREAM:
|
||||
MOVO X0, X4
|
||||
MOVO X1, X5
|
||||
MOVO X2, X6
|
||||
MOVO X3, X7
|
||||
MOVL rounds+32(FP), Tmp0
|
||||
|
||||
CHACHA_LOOP:
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
|
||||
CHACHA_SHUFFLE_SSE(X5, X6, X7)
|
||||
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
|
||||
CHACHA_SHUFFLE_SSE(X7, X6, X5)
|
||||
SUBL $2, Tmp0
|
||||
JA CHACHA_LOOP
|
||||
|
||||
MOVOU 0*16(State), X0 // Restore X0 from state
|
||||
PADDL X0, X4
|
||||
PADDL X1, X5
|
||||
PADDL X2, X6
|
||||
PADDL X3, X7
|
||||
MOVOU ·one<>(SB), X0
|
||||
PADDQ X0, X3
|
||||
|
||||
CMPL Len, $64
|
||||
JL BUFFER_KEYSTREAM
|
||||
|
||||
XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X0)
|
||||
MOVOU 0*16(State), X0 // Restore X0 from state
|
||||
ADDL $64, Src
|
||||
ADDL $64, Dst
|
||||
SUBL $64, Len
|
||||
JZ DONE
|
||||
JMP GENERATE_KEYSTREAM // There is at least one more plaintext byte
|
||||
|
||||
BUFFER_KEYSTREAM:
|
||||
MOVL block+24(FP), State
|
||||
MOVOU X4, 0(State)
|
||||
MOVOU X5, 16(State)
|
||||
MOVOU X6, 32(State)
|
||||
MOVOU X7, 48(State)
|
||||
MOVL Len, ret+36(FP) // Number of bytes written to the keystream buffer - 0 < Len < 64
|
||||
FINALIZE(Dst, Src, State, Len, Tmp0, Tmp1)
|
||||
|
||||
DONE:
|
||||
MOVL state+28(FP), State
|
||||
MOVOU X3, 3*16(State)
|
||||
RET
|
||||
|
||||
#undef State
|
||||
#undef Dst
|
||||
#undef Src
|
||||
#undef Len
|
||||
#undef Tmp0
|
||||
#undef Tmp1
|
76
vendor/github.com/aead/chacha20/chacha/chacha_amd64.go
generated
vendored
76
vendor/github.com/aead/chacha20/chacha/chacha_amd64.go
generated
vendored
|
@ -1,76 +0,0 @@
|
|||
// Copyright (c) 2017 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build go1.7,amd64,!gccgo,!appengine,!nacl
|
||||
|
||||
package chacha
|
||||
|
||||
import "golang.org/x/sys/cpu"
|
||||
|
||||
func init() {
|
||||
useSSE2 = cpu.X86.HasSSE2
|
||||
useSSSE3 = cpu.X86.HasSSSE3
|
||||
useAVX = cpu.X86.HasAVX
|
||||
useAVX2 = cpu.X86.HasAVX2
|
||||
}
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func initialize(state *[64]byte, key []byte, nonce *[16]byte)
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chachaAVX2_amd64.s
|
||||
//go:noescape
|
||||
func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
// This function is implemented in chacha_amd64.s
|
||||
//go:noescape
|
||||
func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
// This function is implemented in chachaAVX2_amd64.s
|
||||
//go:noescape
|
||||
func xorKeyStreamAVX2(dst, src []byte, block, state *[64]byte, rounds int) int
|
||||
|
||||
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
|
||||
switch {
|
||||
case useAVX:
|
||||
hChaCha20AVX(out, nonce, key)
|
||||
case useSSSE3:
|
||||
hChaCha20SSSE3(out, nonce, key)
|
||||
case useSSE2:
|
||||
hChaCha20SSE2(out, nonce, key)
|
||||
default:
|
||||
hChaCha20Generic(out, nonce, key)
|
||||
}
|
||||
}
|
||||
|
||||
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
|
||||
switch {
|
||||
case useAVX2:
|
||||
return xorKeyStreamAVX2(dst, src, block, state, rounds)
|
||||
case useAVX:
|
||||
return xorKeyStreamAVX(dst, src, block, state, rounds)
|
||||
case useSSSE3:
|
||||
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
|
||||
case useSSE2:
|
||||
return xorKeyStreamSSE2(dst, src, block, state, rounds)
|
||||
default:
|
||||
return xorKeyStreamGeneric(dst, src, block, state, rounds)
|
||||
}
|
||||
}
|
1072
vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
generated
vendored
1072
vendor/github.com/aead/chacha20/chacha/chacha_amd64.s
generated
vendored
File diff suppressed because it is too large
Load diff
319
vendor/github.com/aead/chacha20/chacha/chacha_generic.go
generated
vendored
319
vendor/github.com/aead/chacha20/chacha/chacha_generic.go
generated
vendored
|
@ -1,319 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
package chacha
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
var sigma = [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}
|
||||
|
||||
func xorKeyStreamGeneric(dst, src []byte, block, state *[64]byte, rounds int) int {
|
||||
for len(src) >= 64 {
|
||||
chachaGeneric(block, state, rounds)
|
||||
|
||||
for i, v := range block {
|
||||
dst[i] = src[i] ^ v
|
||||
}
|
||||
src = src[64:]
|
||||
dst = dst[64:]
|
||||
}
|
||||
|
||||
n := len(src)
|
||||
if n > 0 {
|
||||
chachaGeneric(block, state, rounds)
|
||||
for i, v := range src {
|
||||
dst[i] = v ^ block[i]
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func chachaGeneric(dst *[64]byte, state *[64]byte, rounds int) {
|
||||
v00 := binary.LittleEndian.Uint32(state[0:])
|
||||
v01 := binary.LittleEndian.Uint32(state[4:])
|
||||
v02 := binary.LittleEndian.Uint32(state[8:])
|
||||
v03 := binary.LittleEndian.Uint32(state[12:])
|
||||
v04 := binary.LittleEndian.Uint32(state[16:])
|
||||
v05 := binary.LittleEndian.Uint32(state[20:])
|
||||
v06 := binary.LittleEndian.Uint32(state[24:])
|
||||
v07 := binary.LittleEndian.Uint32(state[28:])
|
||||
v08 := binary.LittleEndian.Uint32(state[32:])
|
||||
v09 := binary.LittleEndian.Uint32(state[36:])
|
||||
v10 := binary.LittleEndian.Uint32(state[40:])
|
||||
v11 := binary.LittleEndian.Uint32(state[44:])
|
||||
v12 := binary.LittleEndian.Uint32(state[48:])
|
||||
v13 := binary.LittleEndian.Uint32(state[52:])
|
||||
v14 := binary.LittleEndian.Uint32(state[56:])
|
||||
v15 := binary.LittleEndian.Uint32(state[60:])
|
||||
|
||||
s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07
|
||||
s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15
|
||||
|
||||
for i := 0; i < rounds; i += 2 {
|
||||
v00 += v04
|
||||
v12 ^= v00
|
||||
v12 = (v12 << 16) | (v12 >> 16)
|
||||
v08 += v12
|
||||
v04 ^= v08
|
||||
v04 = (v04 << 12) | (v04 >> 20)
|
||||
v00 += v04
|
||||
v12 ^= v00
|
||||
v12 = (v12 << 8) | (v12 >> 24)
|
||||
v08 += v12
|
||||
v04 ^= v08
|
||||
v04 = (v04 << 7) | (v04 >> 25)
|
||||
v01 += v05
|
||||
v13 ^= v01
|
||||
v13 = (v13 << 16) | (v13 >> 16)
|
||||
v09 += v13
|
||||
v05 ^= v09
|
||||
v05 = (v05 << 12) | (v05 >> 20)
|
||||
v01 += v05
|
||||
v13 ^= v01
|
||||
v13 = (v13 << 8) | (v13 >> 24)
|
||||
v09 += v13
|
||||
v05 ^= v09
|
||||
v05 = (v05 << 7) | (v05 >> 25)
|
||||
v02 += v06
|
||||
v14 ^= v02
|
||||
v14 = (v14 << 16) | (v14 >> 16)
|
||||
v10 += v14
|
||||
v06 ^= v10
|
||||
v06 = (v06 << 12) | (v06 >> 20)
|
||||
v02 += v06
|
||||
v14 ^= v02
|
||||
v14 = (v14 << 8) | (v14 >> 24)
|
||||
v10 += v14
|
||||
v06 ^= v10
|
||||
v06 = (v06 << 7) | (v06 >> 25)
|
||||
v03 += v07
|
||||
v15 ^= v03
|
||||
v15 = (v15 << 16) | (v15 >> 16)
|
||||
v11 += v15
|
||||
v07 ^= v11
|
||||
v07 = (v07 << 12) | (v07 >> 20)
|
||||
v03 += v07
|
||||
v15 ^= v03
|
||||
v15 = (v15 << 8) | (v15 >> 24)
|
||||
v11 += v15
|
||||
v07 ^= v11
|
||||
v07 = (v07 << 7) | (v07 >> 25)
|
||||
v00 += v05
|
||||
v15 ^= v00
|
||||
v15 = (v15 << 16) | (v15 >> 16)
|
||||
v10 += v15
|
||||
v05 ^= v10
|
||||
v05 = (v05 << 12) | (v05 >> 20)
|
||||
v00 += v05
|
||||
v15 ^= v00
|
||||
v15 = (v15 << 8) | (v15 >> 24)
|
||||
v10 += v15
|
||||
v05 ^= v10
|
||||
v05 = (v05 << 7) | (v05 >> 25)
|
||||
v01 += v06
|
||||
v12 ^= v01
|
||||
v12 = (v12 << 16) | (v12 >> 16)
|
||||
v11 += v12
|
||||
v06 ^= v11
|
||||
v06 = (v06 << 12) | (v06 >> 20)
|
||||
v01 += v06
|
||||
v12 ^= v01
|
||||
v12 = (v12 << 8) | (v12 >> 24)
|
||||
v11 += v12
|
||||
v06 ^= v11
|
||||
v06 = (v06 << 7) | (v06 >> 25)
|
||||
v02 += v07
|
||||
v13 ^= v02
|
||||
v13 = (v13 << 16) | (v13 >> 16)
|
||||
v08 += v13
|
||||
v07 ^= v08
|
||||
v07 = (v07 << 12) | (v07 >> 20)
|
||||
v02 += v07
|
||||
v13 ^= v02
|
||||
v13 = (v13 << 8) | (v13 >> 24)
|
||||
v08 += v13
|
||||
v07 ^= v08
|
||||
v07 = (v07 << 7) | (v07 >> 25)
|
||||
v03 += v04
|
||||
v14 ^= v03
|
||||
v14 = (v14 << 16) | (v14 >> 16)
|
||||
v09 += v14
|
||||
v04 ^= v09
|
||||
v04 = (v04 << 12) | (v04 >> 20)
|
||||
v03 += v04
|
||||
v14 ^= v03
|
||||
v14 = (v14 << 8) | (v14 >> 24)
|
||||
v09 += v14
|
||||
v04 ^= v09
|
||||
v04 = (v04 << 7) | (v04 >> 25)
|
||||
}
|
||||
|
||||
v00 += s00
|
||||
v01 += s01
|
||||
v02 += s02
|
||||
v03 += s03
|
||||
v04 += s04
|
||||
v05 += s05
|
||||
v06 += s06
|
||||
v07 += s07
|
||||
v08 += s08
|
||||
v09 += s09
|
||||
v10 += s10
|
||||
v11 += s11
|
||||
v12 += s12
|
||||
v13 += s13
|
||||
v14 += s14
|
||||
v15 += s15
|
||||
|
||||
s12++
|
||||
binary.LittleEndian.PutUint32(state[48:], s12)
|
||||
if s12 == 0 { // indicates overflow
|
||||
s13++
|
||||
binary.LittleEndian.PutUint32(state[52:], s13)
|
||||
}
|
||||
|
||||
binary.LittleEndian.PutUint32(dst[0:], v00)
|
||||
binary.LittleEndian.PutUint32(dst[4:], v01)
|
||||
binary.LittleEndian.PutUint32(dst[8:], v02)
|
||||
binary.LittleEndian.PutUint32(dst[12:], v03)
|
||||
binary.LittleEndian.PutUint32(dst[16:], v04)
|
||||
binary.LittleEndian.PutUint32(dst[20:], v05)
|
||||
binary.LittleEndian.PutUint32(dst[24:], v06)
|
||||
binary.LittleEndian.PutUint32(dst[28:], v07)
|
||||
binary.LittleEndian.PutUint32(dst[32:], v08)
|
||||
binary.LittleEndian.PutUint32(dst[36:], v09)
|
||||
binary.LittleEndian.PutUint32(dst[40:], v10)
|
||||
binary.LittleEndian.PutUint32(dst[44:], v11)
|
||||
binary.LittleEndian.PutUint32(dst[48:], v12)
|
||||
binary.LittleEndian.PutUint32(dst[52:], v13)
|
||||
binary.LittleEndian.PutUint32(dst[56:], v14)
|
||||
binary.LittleEndian.PutUint32(dst[60:], v15)
|
||||
}
|
||||
|
||||
func hChaCha20Generic(out *[32]byte, nonce *[16]byte, key *[32]byte) {
|
||||
v00 := sigma[0]
|
||||
v01 := sigma[1]
|
||||
v02 := sigma[2]
|
||||
v03 := sigma[3]
|
||||
v04 := binary.LittleEndian.Uint32(key[0:])
|
||||
v05 := binary.LittleEndian.Uint32(key[4:])
|
||||
v06 := binary.LittleEndian.Uint32(key[8:])
|
||||
v07 := binary.LittleEndian.Uint32(key[12:])
|
||||
v08 := binary.LittleEndian.Uint32(key[16:])
|
||||
v09 := binary.LittleEndian.Uint32(key[20:])
|
||||
v10 := binary.LittleEndian.Uint32(key[24:])
|
||||
v11 := binary.LittleEndian.Uint32(key[28:])
|
||||
v12 := binary.LittleEndian.Uint32(nonce[0:])
|
||||
v13 := binary.LittleEndian.Uint32(nonce[4:])
|
||||
v14 := binary.LittleEndian.Uint32(nonce[8:])
|
||||
v15 := binary.LittleEndian.Uint32(nonce[12:])
|
||||
|
||||
for i := 0; i < 20; i += 2 {
|
||||
v00 += v04
|
||||
v12 ^= v00
|
||||
v12 = (v12 << 16) | (v12 >> 16)
|
||||
v08 += v12
|
||||
v04 ^= v08
|
||||
v04 = (v04 << 12) | (v04 >> 20)
|
||||
v00 += v04
|
||||
v12 ^= v00
|
||||
v12 = (v12 << 8) | (v12 >> 24)
|
||||
v08 += v12
|
||||
v04 ^= v08
|
||||
v04 = (v04 << 7) | (v04 >> 25)
|
||||
v01 += v05
|
||||
v13 ^= v01
|
||||
v13 = (v13 << 16) | (v13 >> 16)
|
||||
v09 += v13
|
||||
v05 ^= v09
|
||||
v05 = (v05 << 12) | (v05 >> 20)
|
||||
v01 += v05
|
||||
v13 ^= v01
|
||||
v13 = (v13 << 8) | (v13 >> 24)
|
||||
v09 += v13
|
||||
v05 ^= v09
|
||||
v05 = (v05 << 7) | (v05 >> 25)
|
||||
v02 += v06
|
||||
v14 ^= v02
|
||||
v14 = (v14 << 16) | (v14 >> 16)
|
||||
v10 += v14
|
||||
v06 ^= v10
|
||||
v06 = (v06 << 12) | (v06 >> 20)
|
||||
v02 += v06
|
||||
v14 ^= v02
|
||||
v14 = (v14 << 8) | (v14 >> 24)
|
||||
v10 += v14
|
||||
v06 ^= v10
|
||||
v06 = (v06 << 7) | (v06 >> 25)
|
||||
v03 += v07
|
||||
v15 ^= v03
|
||||
v15 = (v15 << 16) | (v15 >> 16)
|
||||
v11 += v15
|
||||
v07 ^= v11
|
||||
v07 = (v07 << 12) | (v07 >> 20)
|
||||
v03 += v07
|
||||
v15 ^= v03
|
||||
v15 = (v15 << 8) | (v15 >> 24)
|
||||
v11 += v15
|
||||
v07 ^= v11
|
||||
v07 = (v07 << 7) | (v07 >> 25)
|
||||
v00 += v05
|
||||
v15 ^= v00
|
||||
v15 = (v15 << 16) | (v15 >> 16)
|
||||
v10 += v15
|
||||
v05 ^= v10
|
||||
v05 = (v05 << 12) | (v05 >> 20)
|
||||
v00 += v05
|
||||
v15 ^= v00
|
||||
v15 = (v15 << 8) | (v15 >> 24)
|
||||
v10 += v15
|
||||
v05 ^= v10
|
||||
v05 = (v05 << 7) | (v05 >> 25)
|
||||
v01 += v06
|
||||
v12 ^= v01
|
||||
v12 = (v12 << 16) | (v12 >> 16)
|
||||
v11 += v12
|
||||
v06 ^= v11
|
||||
v06 = (v06 << 12) | (v06 >> 20)
|
||||
v01 += v06
|
||||
v12 ^= v01
|
||||
v12 = (v12 << 8) | (v12 >> 24)
|
||||
v11 += v12
|
||||
v06 ^= v11
|
||||
v06 = (v06 << 7) | (v06 >> 25)
|
||||
v02 += v07
|
||||
v13 ^= v02
|
||||
v13 = (v13 << 16) | (v13 >> 16)
|
||||
v08 += v13
|
||||
v07 ^= v08
|
||||
v07 = (v07 << 12) | (v07 >> 20)
|
||||
v02 += v07
|
||||
v13 ^= v02
|
||||
v13 = (v13 << 8) | (v13 >> 24)
|
||||
v08 += v13
|
||||
v07 ^= v08
|
||||
v07 = (v07 << 7) | (v07 >> 25)
|
||||
v03 += v04
|
||||
v14 ^= v03
|
||||
v14 = (v14 << 16) | (v14 >> 16)
|
||||
v09 += v14
|
||||
v04 ^= v09
|
||||
v04 = (v04 << 12) | (v04 >> 20)
|
||||
v03 += v04
|
||||
v14 ^= v03
|
||||
v14 = (v14 << 8) | (v14 >> 24)
|
||||
v09 += v14
|
||||
v04 ^= v09
|
||||
v04 = (v04 << 7) | (v04 >> 25)
|
||||
}
|
||||
|
||||
binary.LittleEndian.PutUint32(out[0:], v00)
|
||||
binary.LittleEndian.PutUint32(out[4:], v01)
|
||||
binary.LittleEndian.PutUint32(out[8:], v02)
|
||||
binary.LittleEndian.PutUint32(out[12:], v03)
|
||||
binary.LittleEndian.PutUint32(out[16:], v12)
|
||||
binary.LittleEndian.PutUint32(out[20:], v13)
|
||||
binary.LittleEndian.PutUint32(out[24:], v14)
|
||||
binary.LittleEndian.PutUint32(out[28:], v15)
|
||||
}
|
33
vendor/github.com/aead/chacha20/chacha/chacha_ref.go
generated
vendored
33
vendor/github.com/aead/chacha20/chacha/chacha_ref.go
generated
vendored
|
@ -1,33 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build !amd64,!386 gccgo appengine nacl
|
||||
|
||||
package chacha
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
func init() {
|
||||
useSSE2 = false
|
||||
useSSSE3 = false
|
||||
useAVX = false
|
||||
useAVX2 = false
|
||||
}
|
||||
|
||||
func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
|
||||
binary.LittleEndian.PutUint32(state[0:], sigma[0])
|
||||
binary.LittleEndian.PutUint32(state[4:], sigma[1])
|
||||
binary.LittleEndian.PutUint32(state[8:], sigma[2])
|
||||
binary.LittleEndian.PutUint32(state[12:], sigma[3])
|
||||
copy(state[16:], key[:])
|
||||
copy(state[48:], nonce[:])
|
||||
}
|
||||
|
||||
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
|
||||
return xorKeyStreamGeneric(dst, src, block, state, rounds)
|
||||
}
|
||||
|
||||
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
|
||||
hChaCha20Generic(out, nonce, key)
|
||||
}
|
53
vendor/github.com/aead/chacha20/chacha/const.s
generated
vendored
53
vendor/github.com/aead/chacha20/chacha/const.s
generated
vendored
|
@ -1,53 +0,0 @@
|
|||
// Copyright (c) 2018 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA ·sigma<>+0x00(SB)/4, $0x61707865
|
||||
DATA ·sigma<>+0x04(SB)/4, $0x3320646e
|
||||
DATA ·sigma<>+0x08(SB)/4, $0x79622d32
|
||||
DATA ·sigma<>+0x0C(SB)/4, $0x6b206574
|
||||
GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16 // The 4 ChaCha initialization constants
|
||||
|
||||
// SSE2/SSE3/AVX constants
|
||||
|
||||
DATA ·one<>+0x00(SB)/8, $1
|
||||
DATA ·one<>+0x08(SB)/8, $0
|
||||
GLOBL ·one<>(SB), (NOPTR+RODATA), $16 // The constant 1 as 128 bit value
|
||||
|
||||
DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302
|
||||
DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
|
||||
GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 16 bit left rotate constant
|
||||
|
||||
DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003
|
||||
DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
|
||||
GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 8 bit left rotate constant
|
||||
|
||||
// AVX2 constants
|
||||
|
||||
DATA ·one_AVX2<>+0x00(SB)/8, $0
|
||||
DATA ·one_AVX2<>+0x08(SB)/8, $0
|
||||
DATA ·one_AVX2<>+0x10(SB)/8, $1
|
||||
DATA ·one_AVX2<>+0x18(SB)/8, $0
|
||||
GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32 // The constant 1 as 256 bit value
|
||||
|
||||
DATA ·two_AVX2<>+0x00(SB)/8, $2
|
||||
DATA ·two_AVX2<>+0x08(SB)/8, $0
|
||||
DATA ·two_AVX2<>+0x10(SB)/8, $2
|
||||
DATA ·two_AVX2<>+0x18(SB)/8, $0
|
||||
GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32
|
||||
|
||||
DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302
|
||||
DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
|
||||
DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302
|
||||
DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A
|
||||
GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 16 bit left rotate constant
|
||||
|
||||
DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003
|
||||
DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
|
||||
DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003
|
||||
DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B
|
||||
GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 8 bit left rotate constant
|
163
vendor/github.com/aead/chacha20/chacha/macro.s
generated
vendored
163
vendor/github.com/aead/chacha20/chacha/macro.s
generated
vendored
|
@ -1,163 +0,0 @@
|
|||
// Copyright (c) 2018 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl
|
||||
|
||||
// ROTL_SSE rotates all 4 32 bit values of the XMM register v
|
||||
// left by n bits using SSE2 instructions (0 <= n <= 32).
|
||||
// The XMM register t is used as a temp. register.
|
||||
#define ROTL_SSE(n, t, v) \
|
||||
MOVO v, t; \
|
||||
PSLLL $n, t; \
|
||||
PSRLL $(32-n), v; \
|
||||
PXOR t, v
|
||||
|
||||
// ROTL_AVX rotates all 4/8 32 bit values of the AVX/AVX2 register v
|
||||
// left by n bits using AVX/AVX2 instructions (0 <= n <= 32).
|
||||
// The AVX/AVX2 register t is used as a temp. register.
|
||||
#define ROTL_AVX(n, t, v) \
|
||||
VPSLLD $n, v, t; \
|
||||
VPSRLD $(32-n), v, v; \
|
||||
VPXOR v, t, v
|
||||
|
||||
// CHACHA_QROUND_SSE2 performs a ChaCha quarter-round using the
|
||||
// 4 XMM registers v0, v1, v2 and v3. It uses only ROTL_SSE2 for
|
||||
// rotations. The XMM register t is used as a temp. register.
|
||||
#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t) \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
ROTL_SSE(16, t, v3); \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE(12, t, v1); \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
ROTL_SSE(8, t, v3); \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE(7, t, v1)
|
||||
|
||||
// CHACHA_QROUND_SSSE3 performs a ChaCha quarter-round using the
|
||||
// 4 XMM registers v0, v1, v2 and v3. It uses PSHUFB for 8/16 bit
|
||||
// rotations. The XMM register t is used as a temp. register.
|
||||
//
|
||||
// r16 holds the PSHUFB constant for a 16 bit left rotate.
|
||||
// r8 holds the PSHUFB constant for a 8 bit left rotate.
|
||||
#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t, r16, r8) \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
PSHUFB r16, v3; \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE(12, t, v1); \
|
||||
PADDL v1, v0; \
|
||||
PXOR v0, v3; \
|
||||
PSHUFB r8, v3; \
|
||||
PADDL v3, v2; \
|
||||
PXOR v2, v1; \
|
||||
ROTL_SSE(7, t, v1)
|
||||
|
||||
// CHACHA_QROUND_AVX performs a ChaCha quarter-round using the
|
||||
// 4 AVX/AVX2 registers v0, v1, v2 and v3. It uses VPSHUFB for 8/16 bit
|
||||
// rotations. The AVX/AVX2 register t is used as a temp. register.
|
||||
//
|
||||
// r16 holds the VPSHUFB constant for a 16 bit left rotate.
|
||||
// r8 holds the VPSHUFB constant for a 8 bit left rotate.
|
||||
#define CHACHA_QROUND_AVX(v0, v1, v2, v3, t, r16, r8) \
|
||||
VPADDD v0, v1, v0; \
|
||||
VPXOR v3, v0, v3; \
|
||||
VPSHUFB r16, v3, v3; \
|
||||
VPADDD v2, v3, v2; \
|
||||
VPXOR v1, v2, v1; \
|
||||
ROTL_AVX(12, t, v1); \
|
||||
VPADDD v0, v1, v0; \
|
||||
VPXOR v3, v0, v3; \
|
||||
VPSHUFB r8, v3, v3; \
|
||||
VPADDD v2, v3, v2; \
|
||||
VPXOR v1, v2, v1; \
|
||||
ROTL_AVX(7, t, v1)
|
||||
|
||||
// CHACHA_SHUFFLE_SSE performs a ChaCha shuffle using the
|
||||
// 3 XMM registers v1, v2 and v3. The inverse shuffle is
|
||||
// performed by switching v1 and v3: CHACHA_SHUFFLE_SSE(v3, v2, v1).
|
||||
#define CHACHA_SHUFFLE_SSE(v1, v2, v3) \
|
||||
PSHUFL $0x39, v1, v1; \
|
||||
PSHUFL $0x4E, v2, v2; \
|
||||
PSHUFL $0x93, v3, v3
|
||||
|
||||
// CHACHA_SHUFFLE_AVX performs a ChaCha shuffle using the
|
||||
// 3 AVX/AVX2 registers v1, v2 and v3. The inverse shuffle is
|
||||
// performed by switching v1 and v3: CHACHA_SHUFFLE_AVX(v3, v2, v1).
|
||||
#define CHACHA_SHUFFLE_AVX(v1, v2, v3) \
|
||||
VPSHUFD $0x39, v1, v1; \
|
||||
VPSHUFD $0x4E, v2, v2; \
|
||||
VPSHUFD $0x93, v3, v3
|
||||
|
||||
// XOR_SSE extracts 4x16 byte vectors from src at
|
||||
// off, xors all vectors with the corresponding XMM
|
||||
// register (v0 - v3) and writes the result to dst
|
||||
// at off.
|
||||
// The XMM register t is used as a temp. register.
|
||||
#define XOR_SSE(dst, src, off, v0, v1, v2, v3, t) \
|
||||
MOVOU 0+off(src), t; \
|
||||
PXOR v0, t; \
|
||||
MOVOU t, 0+off(dst); \
|
||||
MOVOU 16+off(src), t; \
|
||||
PXOR v1, t; \
|
||||
MOVOU t, 16+off(dst); \
|
||||
MOVOU 32+off(src), t; \
|
||||
PXOR v2, t; \
|
||||
MOVOU t, 32+off(dst); \
|
||||
MOVOU 48+off(src), t; \
|
||||
PXOR v3, t; \
|
||||
MOVOU t, 48+off(dst)
|
||||
|
||||
// XOR_AVX extracts 4x16 byte vectors from src at
|
||||
// off, xors all vectors with the corresponding AVX
|
||||
// register (v0 - v3) and writes the result to dst
|
||||
// at off.
|
||||
// The XMM register t is used as a temp. register.
|
||||
#define XOR_AVX(dst, src, off, v0, v1, v2, v3, t) \
|
||||
VPXOR 0+off(src), v0, t; \
|
||||
VMOVDQU t, 0+off(dst); \
|
||||
VPXOR 16+off(src), v1, t; \
|
||||
VMOVDQU t, 16+off(dst); \
|
||||
VPXOR 32+off(src), v2, t; \
|
||||
VMOVDQU t, 32+off(dst); \
|
||||
VPXOR 48+off(src), v3, t; \
|
||||
VMOVDQU t, 48+off(dst)
|
||||
|
||||
#define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
|
||||
VMOVDQU (0+off)(src), t0; \
|
||||
VPERM2I128 $32, v1, v0, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (0+off)(dst); \
|
||||
VMOVDQU (32+off)(src), t0; \
|
||||
VPERM2I128 $32, v3, v2, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (32+off)(dst); \
|
||||
VMOVDQU (64+off)(src), t0; \
|
||||
VPERM2I128 $49, v1, v0, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (64+off)(dst); \
|
||||
VMOVDQU (96+off)(src), t0; \
|
||||
VPERM2I128 $49, v3, v2, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (96+off)(dst)
|
||||
|
||||
#define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
|
||||
VMOVDQU (0+off)(src), t0; \
|
||||
VPERM2I128 $32, v1, v0, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (0+off)(dst); \
|
||||
VMOVDQU (32+off)(src), t0; \
|
||||
VPERM2I128 $32, v3, v2, t1; \
|
||||
VPXOR t0, t1, t0; \
|
||||
VMOVDQU t0, (32+off)(dst); \
|
||||
|
||||
#define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \
|
||||
VPERM2I128 $49, v1, v0, t0; \
|
||||
VMOVDQU t0, 0(dst); \
|
||||
VPERM2I128 $49, v3, v2, t0; \
|
||||
VMOVDQU t0, 32(dst)
|
25
vendor/github.com/aead/poly1305/.gitignore
generated
vendored
25
vendor/github.com/aead/poly1305/.gitignore
generated
vendored
|
@ -1,25 +0,0 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
.vscode
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
21
vendor/github.com/aead/poly1305/.travis.yml
generated
vendored
21
vendor/github.com/aead/poly1305/.travis.yml
generated
vendored
|
@ -1,21 +0,0 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- "1.8.x"
|
||||
- "1.9.x"
|
||||
- "1.10.x"
|
||||
|
||||
env:
|
||||
- TRAVIS_GOARCH=amd64
|
||||
- TRAVIS_GOARCH=386
|
||||
|
||||
before_install:
|
||||
- export GOARCH=$TRAVIS_GOARCH
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
|
||||
script:
|
||||
- diff -au <(gofmt -d .) <(printf "")
|
||||
- go test -v ./...
|
21
vendor/github.com/aead/poly1305/LICENSE
generated
vendored
21
vendor/github.com/aead/poly1305/LICENSE
generated
vendored
|
@ -1,21 +0,0 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Andreas Auernhammer
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
48
vendor/github.com/aead/poly1305/README.md
generated
vendored
48
vendor/github.com/aead/poly1305/README.md
generated
vendored
|
@ -1,48 +0,0 @@
|
|||
[](https://godoc.org/github.com/aead/poly1305)
|
||||
|
||||
## The poly1305 message authentication code
|
||||
|
||||
Poly1305 is a fast, one-time authentication function created by Daniel J. Bernstein.
|
||||
It is infeasible for an attacker to generate an authenticator for a message without the key.
|
||||
However, a key must only be used for a single message. Authenticating two different messages
|
||||
with the same key allows an attacker to forge authenticators for other messages with the same key.
|
||||
|
||||
### Installation
|
||||
Install in your GOPATH: `go get -u github.com/aead/poly1305`
|
||||
|
||||
### Requirements
|
||||
All Go versions >= 1.7 are supported.
|
||||
|
||||
### Performance
|
||||
|
||||
#### AMD64
|
||||
Hardware: Intel i7-6500U 2.50GHz x 2
|
||||
System: Linux Ubuntu 16.04 - kernel: 4.4.0-62-generic
|
||||
Go version: 1.8.0
|
||||
|
||||
**AVX2**
|
||||
```
|
||||
name speed cpb
|
||||
Sum_64-4 1.60GB/s ± 0% 1.39
|
||||
Sum_256-4 2.32GB/s ± 1% 1.00
|
||||
Sum_1K-4 3.61GB/s ± 1% 0.65
|
||||
Sum_8K-4 4.20GB/s ± 1% 0.55
|
||||
Write_64-4 2.04GB/s ± 0% 1.14
|
||||
Write_256-4 3.50GB/s ± 2% 0.67
|
||||
Write_1K-4 4.08GB/s ± 2% 0.57
|
||||
Write_8K-4 4.25GB/s ± 2% 0.55
|
||||
```
|
||||
|
||||
**x64**
|
||||
|
||||
```
|
||||
name speed cpb
|
||||
Sum_64-4 1.60GB/s ± 1% 1.46
|
||||
Sum_256-4 2.11GB/s ± 3% 1.10
|
||||
Sum_1K-4 2.35GB/s ±13% 0.99
|
||||
Sum_8K-4 2.47GB/s ±13% 0.94
|
||||
Write_64-4 1.81GB/s ± 5% 1.29
|
||||
Write_256-4 2.24GB/s ± 4% 1.04
|
||||
Write_1K-4 2.55GB/s ± 0% 0.91
|
||||
Write_8K-4 2.63GB/s ± 0% 0.88
|
||||
```
|
30
vendor/github.com/aead/poly1305/poly1305.go
generated
vendored
30
vendor/github.com/aead/poly1305/poly1305.go
generated
vendored
|
@ -1,30 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Package poly1305 implements Poly1305 one-time message authentication code
|
||||
// defined in RFC 7539..
|
||||
//
|
||||
// Poly1305 is a fast, one-time authentication function. It is infeasible for an
|
||||
// attacker to generate an authenticator for a message without the key.
|
||||
// However, a key must only be used for a single message. Authenticating two
|
||||
// different messages with the same key allows an attacker to forge
|
||||
// authenticators for other messages with the same key.
|
||||
package poly1305 // import "github.com/aead/poly1305"
|
||||
|
||||
import (
|
||||
"crypto/subtle"
|
||||
"errors"
|
||||
)
|
||||
|
||||
// TagSize is the size of the poly1305 authentication tag in bytes.
|
||||
const TagSize = 16
|
||||
|
||||
var errWriteAfterSum = errors.New("checksum already computed - adding more data is not allowed")
|
||||
|
||||
// Verify returns true if and only if the mac is a valid authenticator
|
||||
// for msg with the given key.
|
||||
func Verify(mac *[TagSize]byte, msg []byte, key [32]byte) bool {
|
||||
sum := Sum(msg, key)
|
||||
return subtle.ConstantTimeCompare(sum[:], mac[:]) == 1
|
||||
}
|
871
vendor/github.com/aead/poly1305/poly1305_AVX2_amd64.s
generated
vendored
871
vendor/github.com/aead/poly1305/poly1305_AVX2_amd64.s
generated
vendored
|
@ -1,871 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// This code is inspired by the poly1305 AVX2 implementation by Shay Gueron, and Vlad Krasnov.
|
||||
|
||||
// +build amd64, !gccgo, !appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA addMaskAVX2<>+0x00(SB)/8, $0x3FFFFFF
|
||||
DATA addMaskAVX2<>+0x08(SB)/8, $0x3FFFFFF
|
||||
DATA addMaskAVX2<>+0x10(SB)/8, $0x3FFFFFF
|
||||
DATA addMaskAVX2<>+0x18(SB)/8, $0x3FFFFFF
|
||||
GLOBL addMaskAVX2<>(SB), RODATA, $32
|
||||
|
||||
DATA poly1305MaskAVX2<>+0x00(SB)/8, $0xFFFFFFC0FFFFFFF
|
||||
DATA poly1305MaskAVX2<>+0x08(SB)/8, $0xFFFFFFC0FFFFFFF
|
||||
DATA poly1305MaskAVX2<>+0x10(SB)/8, $0xFFFFFFC0FFFFFFF
|
||||
DATA poly1305MaskAVX2<>+0x18(SB)/8, $0xFFFFFFC0FFFFFFF
|
||||
DATA poly1305MaskAVX2<>+0x20(SB)/8, $0xFFFFFFC0FFFFFFC
|
||||
DATA poly1305MaskAVX2<>+0x28(SB)/8, $0xFFFFFFC0FFFFFFC
|
||||
DATA poly1305MaskAVX2<>+0x30(SB)/8, $0xFFFFFFC0FFFFFFC
|
||||
DATA poly1305MaskAVX2<>+0x38(SB)/8, $0xFFFFFFC0FFFFFFC
|
||||
GLOBL poly1305MaskAVX2<>(SB), RODATA, $64
|
||||
|
||||
DATA oneBit<>+0x00(SB)/8, $0x1000000
|
||||
DATA oneBit<>+0x08(SB)/8, $0x1000000
|
||||
DATA oneBit<>+0x10(SB)/8, $0x1000000
|
||||
DATA oneBit<>+0x18(SB)/8, $0x1000000
|
||||
GLOBL oneBit<>(SB), RODATA, $32
|
||||
|
||||
DATA fixPermutation<>+0x00(SB)/4, $6
|
||||
DATA fixPermutation<>+0x04(SB)/4, $7
|
||||
DATA fixPermutation<>+0x08(SB)/4, $6
|
||||
DATA fixPermutation<>+0x0c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x10(SB)/4, $6
|
||||
DATA fixPermutation<>+0x14(SB)/4, $7
|
||||
DATA fixPermutation<>+0x18(SB)/4, $6
|
||||
DATA fixPermutation<>+0x1c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x20(SB)/4, $4
|
||||
DATA fixPermutation<>+0x24(SB)/4, $5
|
||||
DATA fixPermutation<>+0x28(SB)/4, $6
|
||||
DATA fixPermutation<>+0x2c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x30(SB)/4, $6
|
||||
DATA fixPermutation<>+0x34(SB)/4, $7
|
||||
DATA fixPermutation<>+0x38(SB)/4, $6
|
||||
DATA fixPermutation<>+0x3c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x40(SB)/4, $2
|
||||
DATA fixPermutation<>+0x44(SB)/4, $3
|
||||
DATA fixPermutation<>+0x48(SB)/4, $6
|
||||
DATA fixPermutation<>+0x4c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x50(SB)/4, $4
|
||||
DATA fixPermutation<>+0x54(SB)/4, $5
|
||||
DATA fixPermutation<>+0x58(SB)/4, $6
|
||||
DATA fixPermutation<>+0x5c(SB)/4, $7
|
||||
DATA fixPermutation<>+0x60(SB)/4, $0
|
||||
DATA fixPermutation<>+0x64(SB)/4, $1
|
||||
DATA fixPermutation<>+0x68(SB)/4, $4
|
||||
DATA fixPermutation<>+0x6c(SB)/4, $5
|
||||
DATA fixPermutation<>+0x70(SB)/4, $2
|
||||
DATA fixPermutation<>+0x74(SB)/4, $3
|
||||
DATA fixPermutation<>+0x78(SB)/4, $6
|
||||
DATA fixPermutation<>+0x7c(SB)/4, $7
|
||||
GLOBL fixPermutation<>(SB), RODATA, $128
|
||||
|
||||
TEXT ·initializeAVX2(SB), $0-16
|
||||
MOVQ state+0(FP), DI
|
||||
MOVQ key+8(FP), SI
|
||||
|
||||
MOVQ $addMaskAVX2<>(SB), R8
|
||||
|
||||
MOVOU 16*1(SI), X10
|
||||
MOVOU X10, 288(DI)
|
||||
PXOR X10, X10
|
||||
MOVOU X10, 304(DI)
|
||||
|
||||
MOVD X10, 320(DI)
|
||||
MOVQ 8*0(SI), X5
|
||||
MOVQ 8*1(SI), X10
|
||||
|
||||
VZEROUPPER
|
||||
|
||||
MOVQ $poly1305MaskAVX2<>(SB), R9
|
||||
VPAND (R9), X5, X5
|
||||
VPAND 32(R9), X10, X10
|
||||
|
||||
VMOVDQU 0(R8), X0
|
||||
VPSRLQ $26, X5, X6
|
||||
VPAND X0, X5, X5
|
||||
VPSRLQ $26, X6, X7
|
||||
VPAND X0, X6, X6
|
||||
VPSLLQ $12, X10, X11
|
||||
VPXOR X11, X7, X7
|
||||
VPSRLQ $26, X7, X8
|
||||
VPSRLQ $40, X10, X9
|
||||
VPAND X0, X7, X7
|
||||
VPAND X0, X8, X8
|
||||
|
||||
BYTE $0xc5; BYTE $0xd1; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ X5, X5, X0
|
||||
BYTE $0xc5; BYTE $0xd1; BYTE $0xf4; BYTE $0xce // VPMULUDQ X6, X5, X1
|
||||
BYTE $0xc5; BYTE $0xd1; BYTE $0xf4; BYTE $0xd7 // VPMULUDQ X7, X5, X2
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x51; BYTE $0xf4; BYTE $0xd8 // VPMULUDQ X8, X5, X3
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x51; BYTE $0xf4; BYTE $0xe1 // VPMULUDQ X9, X5, X4
|
||||
|
||||
VPSLLQ $1, X1, X1
|
||||
VPSLLQ $1, X2, X2
|
||||
BYTE $0xc5; BYTE $0x49; BYTE $0xf4; BYTE $0xd6 // VPMULUDQ X6, X6, X10
|
||||
VPADDQ X10, X2, X2
|
||||
BYTE $0xc5; BYTE $0x49; BYTE $0xf4; BYTE $0xd7 // VPMULUDQ X7, X6, X10
|
||||
VPADDQ X10, X3, X3
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x49; BYTE $0xf4; BYTE $0xd0 // VPMULUDQ X8, X6, X10
|
||||
VPADDQ X10, X4, X4
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x49; BYTE $0xf4; BYTE $0xe1 // VPMULUDQ X9, X6, X12
|
||||
VPSLLQ $1, X3, X3
|
||||
VPSLLQ $1, X4, X4
|
||||
BYTE $0xc5; BYTE $0x41; BYTE $0xf4; BYTE $0xd7 // VPMULUDQ X7, X7, X10
|
||||
VPADDQ X10, X4, X4
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x41; BYTE $0xf4; BYTE $0xd0 // VPMULUDQ X8, X7, X10
|
||||
VPADDQ X10, X12, X12
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x41; BYTE $0xf4; BYTE $0xe9 // VPMULUDQ X9, X7, X13
|
||||
VPSLLQ $1, X12, X12
|
||||
VPSLLQ $1, X13, X13
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x39; BYTE $0xf4; BYTE $0xd0 // VPMULUDQ X8, X8, X10
|
||||
VPADDQ X10, X13, X13
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x39; BYTE $0xf4; BYTE $0xf1 // VPMULUDQ X9, X8, X14
|
||||
VPSLLQ $1, X14, X14
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x31; BYTE $0xf4; BYTE $0xf9 // VPMULUDQ X9, X9, X15
|
||||
|
||||
VPSRLQ $26, X4, X10
|
||||
VPAND 0(R8), X4, X4
|
||||
VPADDQ X10, X12, X12
|
||||
|
||||
VPSLLQ $2, X12, X10
|
||||
VPADDQ X10, X12, X12
|
||||
VPSLLQ $2, X13, X10
|
||||
VPADDQ X10, X13, X13
|
||||
VPSLLQ $2, X14, X10
|
||||
VPADDQ X10, X14, X14
|
||||
VPSLLQ $2, X15, X10
|
||||
VPADDQ X10, X15, X15
|
||||
|
||||
VPADDQ X12, X0, X0
|
||||
VPADDQ X13, X1, X1
|
||||
VPADDQ X14, X2, X2
|
||||
VPADDQ X15, X3, X3
|
||||
|
||||
VPSRLQ $26, X0, X10
|
||||
VPAND 0(R8), X0, X0
|
||||
VPADDQ X10, X1, X1
|
||||
VPSRLQ $26, X1, X10
|
||||
VPAND 0(R8), X1, X1
|
||||
VPADDQ X10, X2, X2
|
||||
VPSRLQ $26, X2, X10
|
||||
VPAND 0(R8), X2, X2
|
||||
VPADDQ X10, X3, X3
|
||||
VPSRLQ $26, X3, X10
|
||||
VPAND 0(R8), X3, X3
|
||||
VPADDQ X10, X4, X4
|
||||
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6c; BYTE $0xed // VPUNPCKLQDQ X5, X0, X5
|
||||
BYTE $0xc5; BYTE $0xf1; BYTE $0x6c; BYTE $0xf6 // VPUNPCKLQDQ X6, X1, X6
|
||||
BYTE $0xc5; BYTE $0xe9; BYTE $0x6c; BYTE $0xff // VPUNPCKLQDQ X7, X2, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0x6c; BYTE $0xc0 // VPUNPCKLQDQ X8, X3, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0x6c; BYTE $0xc9 // VPUNPCKLQDQ X9, X4, X9
|
||||
|
||||
VMOVDQU X5, 0+16(DI)
|
||||
VMOVDQU X6, 32+16(DI)
|
||||
VMOVDQU X7, 64+16(DI)
|
||||
VMOVDQU X8, 96+16(DI)
|
||||
VMOVDQU X9, 128+16(DI)
|
||||
|
||||
VPSLLQ $2, X6, X1
|
||||
VPSLLQ $2, X7, X2
|
||||
VPSLLQ $2, X8, X3
|
||||
VPSLLQ $2, X9, X4
|
||||
|
||||
VPADDQ X1, X6, X1
|
||||
VPADDQ X2, X7, X2
|
||||
VPADDQ X3, X8, X3
|
||||
VPADDQ X4, X9, X4
|
||||
|
||||
VMOVDQU X1, 160+16(DI)
|
||||
VMOVDQU X2, 192+16(DI)
|
||||
VMOVDQU X3, 224+16(DI)
|
||||
VMOVDQU X4, 256+16(DI)
|
||||
|
||||
VPSHUFD $68, X5, X0
|
||||
VPSHUFD $68, X6, X1
|
||||
VPSHUFD $68, X7, X2
|
||||
VPSHUFD $68, X8, X3
|
||||
VPSHUFD $68, X9, X4
|
||||
|
||||
VMOVDQU 0+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x79; BYTE $0xf4; BYTE $0xea // VPMULUDQ X10, X0, X5
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x71; BYTE $0xf4; BYTE $0xf2 // VPMULUDQ X10, X1, X6
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x69; BYTE $0xf4; BYTE $0xfa // VPMULUDQ X10, X2, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xc2 // VPMULUDQ X10, X3, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xca // VPMULUDQ X10, X4, X9
|
||||
|
||||
VMOVDQU 160+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X4, X11
|
||||
VPADDQ X11, X5, X5
|
||||
|
||||
VMOVDQU 32+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X0, X11
|
||||
VPADDQ X11, X6, X6
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X1, X11
|
||||
VPADDQ X11, X7, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X2, X11
|
||||
VPADDQ X11, X8, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X3, X11
|
||||
VPADDQ X11, X9, X9
|
||||
|
||||
VMOVDQU 192+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X3, X11
|
||||
VPADDQ X11, X5, X5
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X4, X11
|
||||
VPADDQ X11, X6, X6
|
||||
|
||||
VMOVDQU 64+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X0, X11
|
||||
VPADDQ X11, X7, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X1, X11
|
||||
VPADDQ X11, X8, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10 ,X2, X11
|
||||
VPADDQ X11, X9, X9
|
||||
|
||||
VMOVDQU 224+16(DI), X10
|
||||
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X2, X11
|
||||
VPADDQ X11, X5, X5
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X3, X11
|
||||
VPADDQ X11, X6, X6
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X4, X11
|
||||
VPADDQ X11, X7, X7
|
||||
|
||||
VMOVDQU 96+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X0, X11
|
||||
VPADDQ X11, X8, X8
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X1, X11
|
||||
VPADDQ X11, X9, X9
|
||||
|
||||
VMOVDQU 256+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x71; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X1, X11
|
||||
VPADDQ X11, X5, X5
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x69; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X2, X11
|
||||
VPADDQ X11, X6, X6
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x61; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X3, X11
|
||||
VPADDQ X11, X7, X7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x59; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X4, X11
|
||||
VPADDQ X11, X8, X8
|
||||
|
||||
VMOVDQU 128+16(DI), X10
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x79; BYTE $0xf4; BYTE $0xda // VPMULUDQ X10, X0, X11
|
||||
VPADDQ X11, X9, X9
|
||||
|
||||
VMOVDQU 0(R8), X12
|
||||
|
||||
VPSRLQ $26, X8, X10
|
||||
VPADDQ X10, X9, X9
|
||||
VPAND X12, X8, X8
|
||||
VPSRLQ $26, X9, X10
|
||||
VPSLLQ $2, X10, X11
|
||||
VPADDQ X11, X10, X10
|
||||
VPADDQ X10, X5, X5
|
||||
VPAND X12, X9, X9
|
||||
VPSRLQ $26, X5, X10
|
||||
VPAND X12, X5, X5
|
||||
VPADDQ X10, X6, X6
|
||||
VPSRLQ $26, X6, X10
|
||||
VPAND X12, X6, X6
|
||||
VPADDQ X10, X7, X7
|
||||
VPSRLQ $26, X7, X10
|
||||
VPAND X12, X7, X7
|
||||
VPADDQ X10, X8, X8
|
||||
VPSRLQ $26, X8, X10
|
||||
VPAND X12, X8, X8
|
||||
VPADDQ X10, X9, X9
|
||||
|
||||
VMOVDQU X5, 0(DI)
|
||||
VMOVDQU X6, 32(DI)
|
||||
VMOVDQU X7, 64(DI)
|
||||
VMOVDQU X8, 96(DI)
|
||||
VMOVDQU X9, 128(DI)
|
||||
|
||||
VPSLLQ $2, X6, X1
|
||||
VPSLLQ $2, X7, X2
|
||||
VPSLLQ $2, X8, X3
|
||||
VPSLLQ $2, X9, X4
|
||||
|
||||
VPADDQ X1, X6, X1
|
||||
VPADDQ X2, X7, X2
|
||||
VPADDQ X3, X8, X3
|
||||
VPADDQ X4, X9, X4
|
||||
|
||||
VMOVDQU X1, 160(DI)
|
||||
VMOVDQU X2, 192(DI)
|
||||
VMOVDQU X3, 224(DI)
|
||||
VMOVDQU X4, 256(DI)
|
||||
|
||||
RET
|
||||
|
||||
TEXT ·updateAVX2(SB), $0-24
|
||||
MOVQ state+0(FP), DI
|
||||
MOVQ msg+8(FP), SI
|
||||
MOVQ msg_len+16(FP), DX
|
||||
|
||||
MOVD 304(DI), X0
|
||||
MOVD 308(DI), X1
|
||||
MOVD 312(DI), X2
|
||||
MOVD 316(DI), X3
|
||||
MOVD 320(DI), X4
|
||||
|
||||
MOVQ $addMaskAVX2<>(SB), R12
|
||||
MOVQ $oneBit<>(SB), R13
|
||||
MOVQ $fixPermutation<>(SB), R15
|
||||
VZEROUPPER
|
||||
|
||||
VMOVDQA (R12), Y12
|
||||
|
||||
CMPQ DX, $128
|
||||
JB BETWEEN_0_AND_128
|
||||
|
||||
AT_LEAST_128:
|
||||
VMOVDQU 32*0(SI), Y9
|
||||
VMOVDQU 32*1(SI), Y10
|
||||
ADDQ $64, SI
|
||||
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10,Y9,Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10,Y9,Y8
|
||||
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xff; BYTE $0xd8 // VPERMQ $216,Y7,Y7
|
||||
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0; BYTE $0xd8 // VPERMQ $216,Y8,Y8
|
||||
|
||||
VPSRLQ $26, Y7, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y0, Y0
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPAND Y12, Y9, Y9
|
||||
VPADDQ Y9, Y1, Y1
|
||||
|
||||
VPSLLQ $12, Y8, Y9
|
||||
VPXOR Y9, Y7, Y7
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y2, Y2
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPSRLQ $40, Y8, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPXOR (R13), Y9, Y9
|
||||
VPADDQ Y7, Y3, Y3
|
||||
VPADDQ Y9, Y4, Y4
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x2f // VPBROADCASTQ 0(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xfd // VPMULUDQ Y5, Y0, Y7
|
||||
BYTE $0xc5; BYTE $0x75; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ Y5, Y1, Y8
|
||||
BYTE $0xc5; BYTE $0x6d; BYTE $0xf4; BYTE $0xcd // VPMULUDQ Y5, Y2, Y9
|
||||
BYTE $0xc5; BYTE $0x65; BYTE $0xf4; BYTE $0xd5 // VPMULUDQ Y5, Y3, Y10
|
||||
BYTE $0xc5; BYTE $0x5d; BYTE $0xf4; BYTE $0xdd // VPMULUDQ Y5, Y4, Y11
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0xa0; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 160(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x6f; BYTE $0x20 // VPBROADCASTQ 32(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0xc0; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 192(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x6f; BYTE $0x40 // VPBROADCASTQ 64(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0xe0; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 224(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0x6f; BYTE $0x60 // VPBROADCASTQ 96(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0x00; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 256(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2,Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x7d; BYTE $0x59; BYTE $0xaf; BYTE $0x80; BYTE $0x00; BYTE $0x00; BYTE $0x00 // VPBROADCASTQ 128(DI),Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPADDQ Y5, Y11, Y11
|
||||
VPAND Y12, Y10, Y10
|
||||
|
||||
VPSRLQ $26, Y11, Y5
|
||||
VPSLLQ $2, Y5, Y6
|
||||
VPADDQ Y6, Y5, Y5
|
||||
VPADDQ Y5, Y7, Y7
|
||||
VPAND Y12, Y11, Y11
|
||||
|
||||
VPSRLQ $26, Y7, Y5
|
||||
VPAND Y12, Y7, Y0
|
||||
VPADDQ Y5, Y8, Y8
|
||||
VPSRLQ $26, Y8, Y5
|
||||
VPAND Y12, Y8, Y1
|
||||
VPADDQ Y5, Y9, Y9
|
||||
VPSRLQ $26, Y9, Y5
|
||||
VPAND Y12, Y9, Y2
|
||||
VPADDQ Y5, Y10, Y10
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPAND Y12, Y10, Y3
|
||||
VPADDQ Y5, Y11, Y4
|
||||
|
||||
SUBQ $64, DX
|
||||
CMPQ DX, $128
|
||||
JAE AT_LEAST_128
|
||||
|
||||
BETWEEN_0_AND_128:
|
||||
CMPQ DX, $64
|
||||
JB BETWEEN_0_AND_64
|
||||
|
||||
VMOVDQU 32*0(SI), Y9
|
||||
VMOVDQU 32*1(SI), Y10
|
||||
ADDQ $64, SI
|
||||
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
|
||||
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xff; BYTE $0xd8 // VPERMQ $216, Y7, Y7
|
||||
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xc0; BYTE $0xd8 // VPERMQ $216, Y8, Y8
|
||||
|
||||
VPSRLQ $26, Y7, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y0, Y0
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPAND Y12, Y9, Y9
|
||||
VPADDQ Y9, Y1, Y1
|
||||
|
||||
VPSLLQ $12, Y8, Y9
|
||||
VPXOR Y9, Y7, Y7
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y2, Y2
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPSRLQ $40, Y8, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPXOR (R13), Y9, Y9
|
||||
VPADDQ Y7, Y3, Y3
|
||||
VPADDQ Y9, Y4, Y4
|
||||
|
||||
VMOVDQU 0(DI), Y5
|
||||
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xfd // VPMULUDQ Y5, Y0, Y7
|
||||
BYTE $0xc5; BYTE $0x75; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ Y5, Y1, Y8
|
||||
BYTE $0xc5; BYTE $0x6d; BYTE $0xf4; BYTE $0xcd // VPMULUDQ Y5, Y2, Y9
|
||||
BYTE $0xc5; BYTE $0x65; BYTE $0xf4; BYTE $0xd5 // VPMULUDQ Y5, Y3, Y10
|
||||
BYTE $0xc5; BYTE $0x5d; BYTE $0xf4; BYTE $0xdd // VPMULUDQ Y5, Y4, Y11
|
||||
|
||||
VMOVDQU 160(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
VMOVDQU 32(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 192(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
|
||||
VMOVDQU 64(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 224(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
|
||||
VMOVDQU 96(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 256(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
|
||||
VMOVDQU 128(DI), Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPADDQ Y5, Y11, Y11
|
||||
VPAND Y12, Y10, Y10
|
||||
VPSRLQ $26, Y11, Y5
|
||||
VPSLLQ $2, Y5, Y6
|
||||
VPADDQ Y6, Y5, Y5
|
||||
VPADDQ Y5, Y7, Y7
|
||||
VPAND Y12, Y11, Y11
|
||||
VPSRLQ $26, Y7, Y5
|
||||
VPAND Y12, Y7, Y0
|
||||
VPADDQ Y5, Y8, Y8
|
||||
VPSRLQ $26, Y8, Y5
|
||||
VPAND Y12, Y8, Y1
|
||||
VPADDQ Y5, Y9, Y9
|
||||
VPSRLQ $26, Y9, Y5
|
||||
VPAND Y12, Y9, Y2
|
||||
VPADDQ Y5, Y10, Y10
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPAND Y12, Y10, Y3
|
||||
VPADDQ Y5, Y11, Y4
|
||||
|
||||
VPSRLDQ $8, Y0, Y7
|
||||
VPSRLDQ $8, Y1, Y8
|
||||
VPSRLDQ $8, Y2, Y9
|
||||
VPSRLDQ $8, Y3, Y10
|
||||
VPSRLDQ $8, Y4, Y11
|
||||
|
||||
VPADDQ Y7, Y0, Y0
|
||||
VPADDQ Y8, Y1, Y1
|
||||
VPADDQ Y9, Y2, Y2
|
||||
VPADDQ Y10, Y3, Y3
|
||||
VPADDQ Y11, Y4, Y4
|
||||
|
||||
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xf8; BYTE $0xaa // VPERMQ $170, Y0, Y7
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xc1; BYTE $0xaa // VPERMQ $170, Y1, Y8
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xca; BYTE $0xaa // VPERMQ $170, Y2, Y9
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xd3; BYTE $0xaa // VPERMQ $170, Y3, Y10
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xdc; BYTE $0xaa // VPERMQ $170, Y4, Y11
|
||||
|
||||
VPADDQ Y7, Y0, Y0
|
||||
VPADDQ Y8, Y1, Y1
|
||||
VPADDQ Y9, Y2, Y2
|
||||
VPADDQ Y10, Y3, Y3
|
||||
VPADDQ Y11, Y4, Y4
|
||||
SUBQ $64, DX
|
||||
|
||||
BETWEEN_0_AND_64:
|
||||
TESTQ DX, DX
|
||||
JZ DONE
|
||||
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xc0 // VMOVQ X0, X0
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xc9 // VMOVQ X1, X1
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xd2 // VMOVQ X2, X2
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xdb // VMOVQ X3, X3
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0xe4 // VMOVQ X4, X4
|
||||
|
||||
MOVQ (R13), BX
|
||||
MOVQ SP, AX
|
||||
TESTQ $15, DX
|
||||
JZ FULL_BLOCKS
|
||||
|
||||
SUBQ $64, SP
|
||||
VPXOR Y7, Y7, Y7
|
||||
VMOVDQU Y7, (SP)
|
||||
VMOVDQU Y7, 32(SP)
|
||||
|
||||
XORQ BX, BX
|
||||
|
||||
FLUSH_BUFFER:
|
||||
MOVB (SI)(BX*1), CX
|
||||
MOVB CX, (SP)(BX*1)
|
||||
INCQ BX
|
||||
CMPQ DX, BX
|
||||
JNE FLUSH_BUFFER
|
||||
|
||||
MOVB $1, (SP)(BX*1)
|
||||
XORQ BX, BX
|
||||
MOVQ SP, SI
|
||||
|
||||
FULL_BLOCKS:
|
||||
CMPQ DX, $16
|
||||
JA AT_LEAST_16
|
||||
|
||||
BYTE $0xc5; BYTE $0xfa; BYTE $0x7e; BYTE $0x3e // VMOVQ 8*0(SI), X7
|
||||
BYTE $0xc5; BYTE $0x7a; BYTE $0x7e; BYTE $0x46; BYTE $0x08 // VMOVQ 8*1(SI), X8
|
||||
BYTE $0xc4; BYTE $0x61; BYTE $0xf9; BYTE $0x6e; BYTE $0xf3 // VMOVQ BX ,X14
|
||||
VMOVDQA (R15), Y13
|
||||
JMP MULTIPLY
|
||||
|
||||
AT_LEAST_16:
|
||||
CMPQ DX, $32
|
||||
JA AT_LEAST_32
|
||||
VMOVDQU 16*0(SI), X9
|
||||
VMOVDQU 16*1(SI), X10
|
||||
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x7a; BYTE $0x7e; BYTE $0x75; BYTE $0x00 // VMOVQ (R13), X14
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0xf3; BYTE $0x01 // VPINSRQ $1,BX, X14, X14
|
||||
VMOVDQA 32(R15), Y13
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
|
||||
JMP MULTIPLY
|
||||
|
||||
AT_LEAST_32:
|
||||
CMPQ DX, $48
|
||||
JA AT_LEAST_48
|
||||
VMOVDQU 32*0(SI), Y9
|
||||
VMOVDQU 32*1(SI), X10
|
||||
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x7a; BYTE $0x7e; BYTE $0x75; BYTE $0x00 // VMOVQ 0(R13), X14
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0xf3; BYTE $0x01 // VPINSRQ $1, BX, X14, X14
|
||||
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xf6; BYTE $0xc4 // VPERMQ $196, Y14, Y14
|
||||
VMOVDQA 64(R15), Y13
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
|
||||
JMP MULTIPLY
|
||||
|
||||
AT_LEAST_48:
|
||||
VMOVDQU 32*0(SI), Y9
|
||||
VMOVDQU 32*1(SI), Y10
|
||||
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x7a; BYTE $0x7e; BYTE $0x75; BYTE $0x00 // VMOVQ (R13),X14
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0xf3; BYTE $0x01 // VPINSRQ $1,BX,X14,X14
|
||||
BYTE $0xc4; BYTE $0x43; BYTE $0xfd; BYTE $0x00; BYTE $0xf6; BYTE $0x40 // VPERMQ $64,Y14,Y14
|
||||
VMOVDQA 96(R15), Y13
|
||||
BYTE $0xc4; BYTE $0xc1; BYTE $0x35; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ Y10, Y9, Y7
|
||||
BYTE $0xc4; BYTE $0x41; BYTE $0x35; BYTE $0x6d; BYTE $0xc2 // VPUNPCKHQDQ Y10, Y9, Y8
|
||||
|
||||
MULTIPLY:
|
||||
MOVQ AX, SP
|
||||
|
||||
VPSRLQ $26, Y7, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y0, Y0
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPAND Y12, Y9, Y9
|
||||
VPADDQ Y9, Y1, Y1
|
||||
|
||||
VPSLLQ $12, Y8, Y9
|
||||
VPXOR Y9, Y7, Y7
|
||||
VPAND Y12, Y7, Y7
|
||||
VPADDQ Y7, Y2, Y2
|
||||
|
||||
VPSRLQ $26, Y9, Y7
|
||||
VPSRLQ $40, Y8, Y9
|
||||
VPAND Y12, Y7, Y7
|
||||
VPXOR Y14, Y9, Y9
|
||||
VPADDQ Y7, Y3, Y3
|
||||
VPADDQ Y9, Y4, Y4
|
||||
|
||||
VMOVDQU 0(DI), Y5
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xfd // VPMULUDQ Y5, Y0, Y7
|
||||
BYTE $0xc5; BYTE $0x75; BYTE $0xf4; BYTE $0xc5 // VPMULUDQ Y5, Y1, Y8
|
||||
BYTE $0xc5; BYTE $0x6d; BYTE $0xf4; BYTE $0xcd // VPMULUDQ Y5, Y2, Y9
|
||||
BYTE $0xc5; BYTE $0x65; BYTE $0xf4; BYTE $0xd5 // VPMULUDQ Y5, Y3, Y10
|
||||
BYTE $0xc5; BYTE $0x5d; BYTE $0xf4; BYTE $0xdd // VPMULUDQ Y5, Y4, Y11
|
||||
|
||||
VMOVDQU 160(DI), Y5
|
||||
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
VMOVDQU 32(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 192(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
|
||||
VMOVDQU 64(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 224(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
|
||||
VMOVDQU 96(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VMOVDQU 256(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xf5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y1,Y6
|
||||
VPADDQ Y6, Y7, Y7
|
||||
BYTE $0xc5; BYTE $0xed; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y2, Y6
|
||||
VPADDQ Y6, Y8, Y8
|
||||
BYTE $0xc5; BYTE $0xe5; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y3, Y6
|
||||
VPADDQ Y6, Y9, Y9
|
||||
BYTE $0xc5; BYTE $0xdd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y4, Y6
|
||||
VPADDQ Y6, Y10, Y10
|
||||
|
||||
VMOVDQU 128(DI), Y5
|
||||
BYTE $0xc4; BYTE $0xe2; BYTE $0x15; BYTE $0x36; BYTE $0xed // VPERMD Y5, Y13, Y5
|
||||
BYTE $0xc5; BYTE $0xfd; BYTE $0xf4; BYTE $0xf5 // VPMULUDQ Y5, Y0, Y6
|
||||
VPADDQ Y6, Y11, Y11
|
||||
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPADDQ Y5, Y11, Y11
|
||||
VPAND Y12, Y10, Y10
|
||||
VPSRLQ $26, Y11, Y5
|
||||
VPSLLQ $2, Y5, Y6
|
||||
VPADDQ Y6, Y5, Y5
|
||||
VPADDQ Y5, Y7, Y7
|
||||
VPAND Y12, Y11, Y11
|
||||
VPSRLQ $26, Y7, Y5
|
||||
VPAND Y12, Y7, Y0
|
||||
VPADDQ Y5, Y8, Y8
|
||||
VPSRLQ $26, Y8, Y5
|
||||
VPAND Y12, Y8, Y1
|
||||
VPADDQ Y5, Y9, Y9
|
||||
VPSRLQ $26, Y9, Y5
|
||||
VPAND Y12, Y9, Y2
|
||||
VPADDQ Y5, Y10, Y10
|
||||
VPSRLQ $26, Y10, Y5
|
||||
VPAND Y12, Y10, Y3
|
||||
VPADDQ Y5, Y11, Y4
|
||||
|
||||
VPSRLDQ $8, Y0, Y7
|
||||
VPSRLDQ $8, Y1, Y8
|
||||
VPSRLDQ $8, Y2, Y9
|
||||
VPSRLDQ $8, Y3, Y10
|
||||
VPSRLDQ $8, Y4, Y11
|
||||
|
||||
VPADDQ Y7, Y0, Y0
|
||||
VPADDQ Y8, Y1, Y1
|
||||
VPADDQ Y9, Y2, Y2
|
||||
VPADDQ Y10, Y3, Y3
|
||||
VPADDQ Y11, Y4, Y4
|
||||
|
||||
BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xf8; BYTE $0xaa // VPERMQ $170, Y0, Y7
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xc1; BYTE $0xaa // VPERMQ $170, Y1, Y8
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xca; BYTE $0xaa // VPERMQ $170, Y2, Y9
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xd3; BYTE $0xaa // VPERMQ $170, Y3, Y10
|
||||
BYTE $0xc4; BYTE $0x63; BYTE $0xfd; BYTE $0x00; BYTE $0xdc; BYTE $0xaa // VPERMQ $170, Y4, Y11
|
||||
|
||||
VPADDQ Y7, Y0, Y0
|
||||
VPADDQ Y8, Y1, Y1
|
||||
VPADDQ Y9, Y2, Y2
|
||||
VPADDQ Y10, Y3, Y3
|
||||
VPADDQ Y11, Y4, Y4
|
||||
|
||||
DONE:
|
||||
VZEROUPPER
|
||||
MOVD X0, 304(DI)
|
||||
MOVD X1, 308(DI)
|
||||
MOVD X2, 312(DI)
|
||||
MOVD X3, 316(DI)
|
||||
MOVD X4, 320(DI)
|
||||
RET
|
||||
|
||||
TEXT ·finalizeAVX2(SB), $0-16
|
||||
MOVQ out+0(FP), SI
|
||||
MOVQ state+8(FP), DI
|
||||
|
||||
VZEROUPPER
|
||||
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x87; BYTE $0x30; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 304(DI), X0
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x8f; BYTE $0x34; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 308(DI), X1
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x97; BYTE $0x38; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 312(DI), X2
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0x9f; BYTE $0x3c; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 316(DI), X3
|
||||
BYTE $0xc5; BYTE $0xf9; BYTE $0x6e; BYTE $0xa7; BYTE $0x40; BYTE $0x01; BYTE $0x00; BYTE $0x00 // VMOVD 320(DI), X4
|
||||
|
||||
VMOVDQU addMaskAVX2<>(SB), X7
|
||||
|
||||
VPSRLQ $26, X4, X5
|
||||
VPSLLQ $2, X5, X6
|
||||
VPADDQ X6, X5, X5
|
||||
VPADDQ X5, X0, X0
|
||||
VPAND X7, X4, X4
|
||||
|
||||
VPSRLQ $26, X0, X5
|
||||
VPAND X7, X0, X0
|
||||
VPADDQ X5, X1, X1
|
||||
VPSRLQ $26, X1, X5
|
||||
VPAND X7, X1, X1
|
||||
VPADDQ X5, X2, X2
|
||||
VPSRLQ $26, X2, X5
|
||||
VPAND X7, X2, X2
|
||||
VPADDQ X5, X3, X3
|
||||
VPSRLQ $26, X3, X5
|
||||
VPAND X7, X3, X3
|
||||
VPADDQ X5, X4, X4
|
||||
|
||||
VPSLLQ $26, X1, X5
|
||||
VPXOR X5, X0, X0
|
||||
VPSLLQ $52, X2, X5
|
||||
VPXOR X5, X0, X0
|
||||
VPSRLQ $12, X2, X1
|
||||
VPSLLQ $14, X3, X5
|
||||
VPXOR X5, X1, X1
|
||||
VPSLLQ $40, X4, X5
|
||||
VPXOR X5, X1, X1
|
||||
|
||||
VZEROUPPER
|
||||
|
||||
MOVQ X0, AX
|
||||
MOVQ X1, BX
|
||||
|
||||
ADDQ 288(DI), AX
|
||||
ADCQ 288+8(DI), BX
|
||||
MOVQ AX, (SI)
|
||||
MOVQ BX, 8(SI)
|
||||
|
||||
RET
|
195
vendor/github.com/aead/poly1305/poly1305_amd64.go
generated
vendored
195
vendor/github.com/aead/poly1305/poly1305_amd64.go
generated
vendored
|
@ -1,195 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build amd64, !gccgo, !appengine
|
||||
|
||||
package poly1305
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/cpu"
|
||||
"io"
|
||||
)
|
||||
|
||||
var useAVX2 = cpu.X86.HasAVX2
|
||||
|
||||
//go:noescape
|
||||
func initialize(state *[7]uint64, key *[32]byte)
|
||||
|
||||
//go:noescape
|
||||
func initializeAVX2(state *[512]byte, key *[32]byte)
|
||||
|
||||
//go:noescape
|
||||
func update(state *[7]uint64, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func updateAVX2(state *[512]byte, msg []byte)
|
||||
|
||||
//go:noescape
|
||||
func finalize(tag *[TagSize]byte, state *[7]uint64)
|
||||
|
||||
//go:noescape
|
||||
func finalizeAVX2(tag *[TagSize]byte, state *[512]byte)
|
||||
|
||||
// compiler asserts - check that poly1305Hash and poly1305HashAVX2 implements the hash interface
|
||||
var (
|
||||
_ (hash) = &poly1305Hash{}
|
||||
_ (hash) = &poly1305HashAVX2{}
|
||||
)
|
||||
|
||||
type hash interface {
|
||||
io.Writer
|
||||
|
||||
Sum(b []byte) []byte
|
||||
}
|
||||
|
||||
// Sum generates an authenticator for msg using a one-time key and returns the
|
||||
// 16-byte result. Authenticating two different messages with the same key allows
|
||||
// an attacker to forge messages at will.
|
||||
func Sum(msg []byte, key [32]byte) [TagSize]byte {
|
||||
if len(msg) == 0 {
|
||||
msg = []byte{}
|
||||
}
|
||||
var out [TagSize]byte
|
||||
if useAVX2 && len(msg) > 8*TagSize {
|
||||
var state [512]byte
|
||||
initializeAVX2(&state, &key)
|
||||
updateAVX2(&state, msg)
|
||||
finalizeAVX2(&out, &state)
|
||||
} else {
|
||||
var state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
|
||||
initialize(&state, &key)
|
||||
update(&state, msg)
|
||||
finalize(&out, &state)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// New returns a Hash computing the poly1305 sum.
|
||||
// Notice that Poly1305 is insecure if one key is used twice.
|
||||
func New(key [32]byte) *Hash {
|
||||
if useAVX2 {
|
||||
h := new(poly1305HashAVX2)
|
||||
initializeAVX2(&(h.state), &key)
|
||||
return &Hash{h, false}
|
||||
}
|
||||
h := new(poly1305Hash)
|
||||
initialize(&(h.state), &key)
|
||||
return &Hash{h, false}
|
||||
}
|
||||
|
||||
// Hash implements the poly1305 authenticator.
|
||||
// Poly1305 cannot be used like common hash.Hash implementations,
|
||||
// because using a poly1305 key twice breaks its security.
|
||||
type Hash struct {
|
||||
hash
|
||||
|
||||
done bool
|
||||
}
|
||||
|
||||
// Size returns the number of bytes Sum will append.
|
||||
func (h *Hash) Size() int { return TagSize }
|
||||
|
||||
// Write adds more data to the running Poly1305 hash.
|
||||
// This function should return a non-nil error if a call
|
||||
// to Write happens after a call to Sum. So it is not possible
|
||||
// to compute the checksum and than add more data.
|
||||
func (h *Hash) Write(msg []byte) (int, error) {
|
||||
if h.done {
|
||||
return 0, errWriteAfterSum
|
||||
}
|
||||
return h.hash.Write(msg)
|
||||
}
|
||||
|
||||
// Sum appends the Poly1305 hash of the previously
|
||||
// processed data to b and returns the resulting slice.
|
||||
// It is safe to call this function multiple times.
|
||||
func (h *Hash) Sum(b []byte) []byte {
|
||||
b = h.hash.Sum(b)
|
||||
h.done = true
|
||||
return b
|
||||
}
|
||||
|
||||
type poly1305Hash struct {
|
||||
state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
|
||||
|
||||
buf [TagSize]byte
|
||||
off int
|
||||
}
|
||||
|
||||
func (h *poly1305Hash) Write(p []byte) (n int, err error) {
|
||||
n = len(p)
|
||||
if h.off > 0 {
|
||||
dif := TagSize - h.off
|
||||
if n <= dif {
|
||||
h.off += copy(h.buf[h.off:], p)
|
||||
return n, nil
|
||||
}
|
||||
copy(h.buf[h.off:], p[:dif])
|
||||
update(&(h.state), h.buf[:])
|
||||
p = p[dif:]
|
||||
h.off = 0
|
||||
}
|
||||
// process full 16-byte blocks
|
||||
if nn := len(p) & (^(TagSize - 1)); nn > 0 {
|
||||
update(&(h.state), p[:nn])
|
||||
p = p[nn:]
|
||||
}
|
||||
if len(p) > 0 {
|
||||
h.off += copy(h.buf[h.off:], p)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (h *poly1305Hash) Sum(b []byte) []byte {
|
||||
var out [TagSize]byte
|
||||
state := h.state
|
||||
if h.off > 0 {
|
||||
update(&state, h.buf[:h.off])
|
||||
}
|
||||
finalize(&out, &state)
|
||||
return append(b, out[:]...)
|
||||
}
|
||||
|
||||
type poly1305HashAVX2 struct {
|
||||
// r[0] | r^2[0] | r[1] | r^2[1] | r[2] | r^2[2] | r[3] | r^2[3] | r[4] | r^2[4] | r[1]*5 | r^2[1]*5 | r[2]*5 | r^2[2]*5 r[3]*5 | r^2[3]*5 r[4]*5 | r^2[4]*5
|
||||
state [512]byte
|
||||
|
||||
buffer [8 * TagSize]byte
|
||||
offset int
|
||||
}
|
||||
|
||||
func (h *poly1305HashAVX2) Write(p []byte) (n int, err error) {
|
||||
n = len(p)
|
||||
if h.offset > 0 {
|
||||
remaining := 8*TagSize - h.offset
|
||||
if n <= remaining {
|
||||
h.offset += copy(h.buffer[h.offset:], p)
|
||||
return n, nil
|
||||
}
|
||||
copy(h.buffer[h.offset:], p[:remaining])
|
||||
updateAVX2(&h.state, h.buffer[:])
|
||||
p = p[remaining:]
|
||||
h.offset = 0
|
||||
}
|
||||
// process full 8*16-byte blocks
|
||||
if nn := len(p) & (^(8*TagSize - 1)); nn > 0 {
|
||||
updateAVX2(&h.state, p[:nn])
|
||||
p = p[nn:]
|
||||
}
|
||||
if len(p) > 0 {
|
||||
h.offset += copy(h.buffer[:], p)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (h *poly1305HashAVX2) Sum(b []byte) []byte {
|
||||
var out [TagSize]byte
|
||||
state := h.state
|
||||
|
||||
if h.offset > 0 {
|
||||
updateAVX2(&state, h.buffer[:h.offset])
|
||||
}
|
||||
finalizeAVX2(&out, &state)
|
||||
return append(b, out[:]...)
|
||||
}
|
155
vendor/github.com/aead/poly1305/poly1305_amd64.s
generated
vendored
155
vendor/github.com/aead/poly1305/poly1305_amd64.s
generated
vendored
|
@ -1,155 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build amd64, !gccgo, !appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
|
||||
DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
|
||||
GLOBL ·poly1305Mask<>(SB), RODATA, $16
|
||||
|
||||
#define POLY1305_ADD(msg, h0, h1, h2) \
|
||||
ADDQ 0(msg), h0; \
|
||||
ADCQ 8(msg), h1; \
|
||||
ADCQ $1, h2; \
|
||||
LEAQ 16(msg), msg
|
||||
|
||||
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
|
||||
MOVQ r0, AX; \
|
||||
MULQ h0; \
|
||||
MOVQ AX, t0; \
|
||||
MOVQ DX, t1; \
|
||||
MOVQ r0, AX; \
|
||||
MULQ h1; \
|
||||
ADDQ AX, t1; \
|
||||
ADCQ $0, DX; \
|
||||
MOVQ r0, t2; \
|
||||
IMULQ h2, t2; \
|
||||
ADDQ DX, t2; \
|
||||
\
|
||||
MOVQ r1, AX; \
|
||||
MULQ h0; \
|
||||
ADDQ AX, t1; \
|
||||
ADCQ $0, DX; \
|
||||
MOVQ DX, h0; \
|
||||
MOVQ r1, t3; \
|
||||
IMULQ h2, t3; \
|
||||
MOVQ r1, AX; \
|
||||
MULQ h1; \
|
||||
ADDQ AX, t2; \
|
||||
ADCQ DX, t3; \
|
||||
ADDQ h0, t2; \
|
||||
ADCQ $0, t3; \
|
||||
\
|
||||
MOVQ t0, h0; \
|
||||
MOVQ t1, h1; \
|
||||
MOVQ t2, h2; \
|
||||
ANDQ $3, h2; \
|
||||
MOVQ t2, t0; \
|
||||
ANDQ $0XFFFFFFFFFFFFFFFC, t0; \
|
||||
ADDQ t0, h0; \
|
||||
ADCQ t3, h1; \
|
||||
ADCQ $0, h2; \
|
||||
SHRQ $2, t3, t2; \
|
||||
SHRQ $2, t3; \
|
||||
ADDQ t2, h0; \
|
||||
ADCQ t3, h1; \
|
||||
ADCQ $0, h2
|
||||
|
||||
// func update(state *[7]uint64, msg []byte)
|
||||
TEXT ·update(SB), $0-32
|
||||
MOVQ state+0(FP), DI
|
||||
MOVQ msg_base+8(FP), SI
|
||||
MOVQ msg_len+16(FP), R15
|
||||
|
||||
MOVQ 0(DI), R8 // h0
|
||||
MOVQ 8(DI), R9 // h1
|
||||
MOVQ 16(DI), R10 // h2
|
||||
MOVQ 24(DI), R11 // r0
|
||||
MOVQ 32(DI), R12 // h1
|
||||
|
||||
CMPQ R15, $16
|
||||
JB BYTES_BETWEEN_0_AND_15
|
||||
|
||||
LOOP:
|
||||
POLY1305_ADD(SI, R8, R9, R10)
|
||||
|
||||
MULTIPLY:
|
||||
POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
|
||||
SUBQ $16, R15
|
||||
CMPQ R15, $16
|
||||
JAE LOOP
|
||||
|
||||
BYTES_BETWEEN_0_AND_15:
|
||||
TESTQ R15, R15
|
||||
JZ DONE
|
||||
MOVQ $1, BX
|
||||
XORQ CX, CX
|
||||
XORQ R13, R13
|
||||
ADDQ R15, SI
|
||||
|
||||
FLUSH_BUFFER:
|
||||
SHLQ $8, BX, CX
|
||||
SHLQ $8, BX
|
||||
MOVB -1(SI), R13
|
||||
XORQ R13, BX
|
||||
DECQ SI
|
||||
DECQ R15
|
||||
JNZ FLUSH_BUFFER
|
||||
|
||||
ADDQ BX, R8
|
||||
ADCQ CX, R9
|
||||
ADCQ $0, R10
|
||||
MOVQ $16, R15
|
||||
JMP MULTIPLY
|
||||
|
||||
DONE:
|
||||
MOVQ R8, 0(DI)
|
||||
MOVQ R9, 8(DI)
|
||||
MOVQ R10, 16(DI)
|
||||
RET
|
||||
|
||||
// func initialize(state *[7]uint64, key *[32]byte)
|
||||
TEXT ·initialize(SB), $0-16
|
||||
MOVQ state+0(FP), DI
|
||||
MOVQ key+8(FP), SI
|
||||
|
||||
// state[0...7] is initialized with zero
|
||||
MOVOU 0(SI), X0
|
||||
MOVOU 16(SI), X1
|
||||
MOVOU ·poly1305Mask<>(SB), X2
|
||||
PAND X2, X0
|
||||
MOVOU X0, 24(DI)
|
||||
MOVOU X1, 40(DI)
|
||||
RET
|
||||
|
||||
// func finalize(tag *[TagSize]byte, state *[7]uint64)
|
||||
TEXT ·finalize(SB), $0-16
|
||||
MOVQ tag+0(FP), DI
|
||||
MOVQ state+8(FP), SI
|
||||
|
||||
MOVQ 0(SI), AX
|
||||
MOVQ 8(SI), BX
|
||||
MOVQ 16(SI), CX
|
||||
MOVQ AX, R8
|
||||
MOVQ BX, R9
|
||||
SUBQ $0XFFFFFFFFFFFFFFFB, AX
|
||||
SBBQ $0XFFFFFFFFFFFFFFFF, BX
|
||||
SBBQ $3, CX
|
||||
CMOVQCS R8, AX
|
||||
CMOVQCS R9, BX
|
||||
ADDQ 40(SI), AX
|
||||
ADCQ 48(SI), BX
|
||||
|
||||
MOVQ AX, 0(DI)
|
||||
MOVQ BX, 8(DI)
|
||||
RET
|
||||
|
||||
|
||||
// func supportsAVX2() bool
|
||||
TEXT ·supportsAVX2(SB), 4, $0-1
|
||||
MOVQ runtime·support_avx2(SB), AX
|
||||
MOVB AX, ret+0(FP)
|
||||
RET
|
229
vendor/github.com/aead/poly1305/poly1305_ref.go
generated
vendored
229
vendor/github.com/aead/poly1305/poly1305_ref.go
generated
vendored
|
@ -1,229 +0,0 @@
|
|||
// Copyright (c) 2016 Andreas Auernhammer. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// +build !amd64 gccgo appengine nacl
|
||||
|
||||
package poly1305
|
||||
|
||||
import "encoding/binary"
|
||||
|
||||
const (
|
||||
msgBlock = uint32(1 << 24)
|
||||
finalBlock = uint32(0)
|
||||
)
|
||||
|
||||
// Sum generates an authenticator for msg using a one-time key and returns the
|
||||
// 16-byte result. Authenticating two different messages with the same key allows
|
||||
// an attacker to forge messages at will.
|
||||
func Sum(msg []byte, key [32]byte) [TagSize]byte {
|
||||
var (
|
||||
h, r [5]uint32
|
||||
s [4]uint32
|
||||
)
|
||||
var out [TagSize]byte
|
||||
|
||||
initialize(&r, &s, &key)
|
||||
|
||||
// process full 16-byte blocks
|
||||
n := len(msg) & (^(TagSize - 1))
|
||||
if n > 0 {
|
||||
update(msg[:n], msgBlock, &h, &r)
|
||||
msg = msg[n:]
|
||||
}
|
||||
if len(msg) > 0 {
|
||||
var block [TagSize]byte
|
||||
off := copy(block[:], msg)
|
||||
block[off] = 1
|
||||
update(block[:], finalBlock, &h, &r)
|
||||
}
|
||||
finalize(&out, &h, &s)
|
||||
return out
|
||||
}
|
||||
|
||||
// New returns a hash.Hash computing the poly1305 sum.
|
||||
// Notice that Poly1305 is insecure if one key is used twice.
|
||||
func New(key [32]byte) *Hash {
|
||||
p := new(Hash)
|
||||
initialize(&(p.r), &(p.s), &key)
|
||||
return p
|
||||
}
|
||||
|
||||
// Hash implements a Poly1305 writer interface.
|
||||
// Poly1305 cannot be used like common hash.Hash implementations,
|
||||
// because using a poly1305 key twice breaks its security.
|
||||
// So poly1305.Hash does not support some kind of reset.
|
||||
type Hash struct {
|
||||
h, r [5]uint32
|
||||
s [4]uint32
|
||||
|
||||
buf [TagSize]byte
|
||||
off int
|
||||
done bool
|
||||
}
|
||||
|
||||
// Size returns the number of bytes Sum will append.
|
||||
func (p *Hash) Size() int { return TagSize }
|
||||
|
||||
// Write adds more data to the running Poly1305 hash.
|
||||
// This function should return a non-nil error if a call
|
||||
// to Write happens after a call to Sum. So it is not possible
|
||||
// to compute the checksum and than add more data.
|
||||
func (p *Hash) Write(msg []byte) (int, error) {
|
||||
if p.done {
|
||||
return 0, errWriteAfterSum
|
||||
}
|
||||
n := len(msg)
|
||||
|
||||
if p.off > 0 {
|
||||
dif := TagSize - p.off
|
||||
if n <= dif {
|
||||
p.off += copy(p.buf[p.off:], msg)
|
||||
return n, nil
|
||||
}
|
||||
copy(p.buf[p.off:], msg[:dif])
|
||||
msg = msg[dif:]
|
||||
update(p.buf[:], msgBlock, &(p.h), &(p.r))
|
||||
p.off = 0
|
||||
}
|
||||
|
||||
// process full 16-byte blocks
|
||||
if nn := len(msg) & (^(TagSize - 1)); nn > 0 {
|
||||
update(msg[:nn], msgBlock, &(p.h), &(p.r))
|
||||
msg = msg[nn:]
|
||||
}
|
||||
|
||||
if len(msg) > 0 {
|
||||
p.off += copy(p.buf[p.off:], msg)
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Sum appends the Pol1305 hash of the previously
|
||||
// processed data to b and returns the resulting slice.
|
||||
// It is safe to call this function multiple times.
|
||||
func (p *Hash) Sum(b []byte) []byte {
|
||||
var out [TagSize]byte
|
||||
h := p.h
|
||||
|
||||
if p.off > 0 {
|
||||
var buf [TagSize]byte
|
||||
copy(buf[:], p.buf[:p.off])
|
||||
buf[p.off] = 1 // invariant: p.off < TagSize
|
||||
|
||||
update(buf[:], finalBlock, &h, &(p.r))
|
||||
}
|
||||
|
||||
finalize(&out, &h, &(p.s))
|
||||
p.done = true
|
||||
return append(b, out[:]...)
|
||||
}
|
||||
|
||||
func initialize(r *[5]uint32, s *[4]uint32, key *[32]byte) {
|
||||
r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff
|
||||
r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03
|
||||
r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff
|
||||
r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff
|
||||
r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff
|
||||
|
||||
s[0] = binary.LittleEndian.Uint32(key[16:])
|
||||
s[1] = binary.LittleEndian.Uint32(key[20:])
|
||||
s[2] = binary.LittleEndian.Uint32(key[24:])
|
||||
s[3] = binary.LittleEndian.Uint32(key[28:])
|
||||
}
|
||||
|
||||
func update(msg []byte, flag uint32, h, r *[5]uint32) {
|
||||
h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
|
||||
r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4])
|
||||
R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
|
||||
|
||||
for len(msg) >= TagSize {
|
||||
// h += msg
|
||||
h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
|
||||
h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
|
||||
h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
|
||||
h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
|
||||
h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag
|
||||
|
||||
// h *= r
|
||||
d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
|
||||
d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
|
||||
d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
|
||||
d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
|
||||
d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
|
||||
|
||||
// h %= p
|
||||
h0 = uint32(d0) & 0x3ffffff
|
||||
h1 = uint32(d1) & 0x3ffffff
|
||||
h2 = uint32(d2) & 0x3ffffff
|
||||
h3 = uint32(d3) & 0x3ffffff
|
||||
h4 = uint32(d4) & 0x3ffffff
|
||||
|
||||
h0 += uint32(d4>>26) * 5
|
||||
h1 += h0 >> 26
|
||||
h0 = h0 & 0x3ffffff
|
||||
|
||||
msg = msg[TagSize:]
|
||||
}
|
||||
|
||||
h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4
|
||||
}
|
||||
|
||||
func finalize(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
|
||||
h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
|
||||
|
||||
// h %= p reduction
|
||||
h2 += h1 >> 26
|
||||
h1 &= 0x3ffffff
|
||||
h3 += h2 >> 26
|
||||
h2 &= 0x3ffffff
|
||||
h4 += h3 >> 26
|
||||
h3 &= 0x3ffffff
|
||||
h0 += 5 * (h4 >> 26)
|
||||
h4 &= 0x3ffffff
|
||||
h1 += h0 >> 26
|
||||
h0 &= 0x3ffffff
|
||||
|
||||
// h - p
|
||||
t0 := h0 + 5
|
||||
t1 := h1 + (t0 >> 26)
|
||||
t2 := h2 + (t1 >> 26)
|
||||
t3 := h3 + (t2 >> 26)
|
||||
t4 := h4 + (t3 >> 26) - (1 << 26)
|
||||
t0 &= 0x3ffffff
|
||||
t1 &= 0x3ffffff
|
||||
t2 &= 0x3ffffff
|
||||
t3 &= 0x3ffffff
|
||||
|
||||
// select h if h < p else h - p
|
||||
t_mask := (t4 >> 31) - 1
|
||||
h_mask := ^t_mask
|
||||
h0 = (h0 & h_mask) | (t0 & t_mask)
|
||||
h1 = (h1 & h_mask) | (t1 & t_mask)
|
||||
h2 = (h2 & h_mask) | (t2 & t_mask)
|
||||
h3 = (h3 & h_mask) | (t3 & t_mask)
|
||||
h4 = (h4 & h_mask) | (t4 & t_mask)
|
||||
|
||||
// h %= 2^128
|
||||
h0 |= h1 << 26
|
||||
h1 = ((h1 >> 6) | (h2 << 20))
|
||||
h2 = ((h2 >> 12) | (h3 << 14))
|
||||
h3 = ((h3 >> 18) | (h4 << 8))
|
||||
|
||||
// s: the s part of the key
|
||||
// tag = (h + s) % (2^128)
|
||||
t := uint64(h0) + uint64(s[0])
|
||||
h0 = uint32(t)
|
||||
t = uint64(h1) + uint64(s[1]) + (t >> 32)
|
||||
h1 = uint32(t)
|
||||
t = uint64(h2) + uint64(s[2]) + (t >> 32)
|
||||
h2 = uint32(t)
|
||||
t = uint64(h3) + uint64(s[3]) + (t >> 32)
|
||||
h3 = uint32(t)
|
||||
|
||||
binary.LittleEndian.PutUint32(out[0:], h0)
|
||||
binary.LittleEndian.PutUint32(out[4:], h1)
|
||||
binary.LittleEndian.PutUint32(out[8:], h2)
|
||||
binary.LittleEndian.PutUint32(out[12:], h3)
|
||||
}
|
4
vendor/github.com/jedisct1/xsecretbox/.travis.yml
generated
vendored
4
vendor/github.com/jedisct1/xsecretbox/.travis.yml
generated
vendored
|
@ -1,4 +0,0 @@
|
|||
sudo: false
|
||||
language: go
|
||||
go:
|
||||
- 1.x
|
9
vendor/github.com/jedisct1/xsecretbox/go.mod
generated
vendored
9
vendor/github.com/jedisct1/xsecretbox/go.mod
generated
vendored
|
@ -1,10 +1,5 @@
|
|||
module github.com/jedisct1/xsecretbox
|
||||
|
||||
go 1.15
|
||||
go 1.16
|
||||
|
||||
require (
|
||||
github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da
|
||||
github.com/aead/poly1305 v0.0.0-20180717145839-3fee0db0b635
|
||||
golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad
|
||||
golang.org/x/sys v0.0.0-20201231184435-2d18734c6014 // indirect
|
||||
)
|
||||
require golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2
|
||||
|
|
23
vendor/github.com/jedisct1/xsecretbox/go.sum
generated
vendored
23
vendor/github.com/jedisct1/xsecretbox/go.sum
generated
vendored
|
@ -1,15 +1,8 @@
|
|||
github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da h1:KjTM2ks9d14ZYCvmHS9iAKVt9AyzRSqNU1qabPih5BY=
|
||||
github.com/aead/chacha20 v0.0.0-20180709150244-8b13a72661da/go.mod h1:eHEWzANqSiWQsof+nXEI9bUVUyV6F53Fp89EuCh2EAA=
|
||||
github.com/aead/poly1305 v0.0.0-20180717145839-3fee0db0b635 h1:52m0LGchQBBVqJRyYYufQuIbVqRawmubW3OFGqK1ekw=
|
||||
github.com/aead/poly1305 v0.0.0-20180717145839-3fee0db0b635/go.mod h1:lmLxL+FV291OopO93Bwf9fQLQeLyt33VJRUg5VJ30us=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad h1:DN0cp81fZ3njFcrLCytUHRSUkqBjfTo4Tx9RJTWs0EY=
|
||||
golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201231184435-2d18734c6014 h1:joucsQqXmyBVxViHCPFjG3hx8JzIFSaym3l3MM/Jsdg=
|
||||
golang.org/x/sys v0.0.0-20201231184435-2d18734c6014/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2 h1:It14KIkyBFYkHkwZ7k45minvA9aorojkyjGk9KJ5B/w=
|
||||
golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 h1:nxC68pudNYkKU6jWhgrqdreuFiOQWj1Fs7T3VrH4Pjw=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
|
|
27
vendor/github.com/jedisct1/xsecretbox/sharedkey.go
generated
vendored
27
vendor/github.com/jedisct1/xsecretbox/sharedkey.go
generated
vendored
|
@ -2,27 +2,26 @@ package xsecretbox
|
|||
|
||||
import (
|
||||
crypto_rand "crypto/rand"
|
||||
"errors"
|
||||
|
||||
"github.com/aead/chacha20/chacha"
|
||||
"golang.org/x/crypto/chacha20"
|
||||
"golang.org/x/crypto/curve25519"
|
||||
)
|
||||
|
||||
// SharedKey computes a shared secret compatible with the one used by `crypto_box_xchacha20poly1305``
|
||||
func SharedKey(secretKey [32]byte, publicKey [32]byte) ([32]byte, error) {
|
||||
var sharedKey [32]byte
|
||||
curve25519.ScalarMult(&sharedKey, &secretKey, &publicKey)
|
||||
c := byte(0)
|
||||
for i := 0; i < 32; i++ {
|
||||
c |= sharedKey[i]
|
||||
dhKey, err := curve25519.X25519(secretKey[:], publicKey[:])
|
||||
var subKey []byte
|
||||
if err == nil {
|
||||
var nonce [16]byte
|
||||
subKey, err = chacha20.HChaCha20(dhKey[:], nonce[:])
|
||||
}
|
||||
if c == 0 {
|
||||
if _, err := crypto_rand.Read(sharedKey[:]); err != nil {
|
||||
return sharedKey, err
|
||||
var key [32]byte
|
||||
if err != nil {
|
||||
if _, err2 := crypto_rand.Read(key[:]); err != nil {
|
||||
return key, err2
|
||||
}
|
||||
return sharedKey, errors.New("weak public key")
|
||||
return key, err
|
||||
}
|
||||
var nonce [16]byte
|
||||
chacha.HChaCha20(&sharedKey, &nonce, &sharedKey)
|
||||
return sharedKey, nil
|
||||
copy(key[:], subKey)
|
||||
return key, nil
|
||||
}
|
||||
|
|
12
vendor/github.com/jedisct1/xsecretbox/xsecretbox.go
generated
vendored
12
vendor/github.com/jedisct1/xsecretbox/xsecretbox.go
generated
vendored
|
@ -4,8 +4,8 @@ import (
|
|||
"crypto/subtle"
|
||||
"errors"
|
||||
|
||||
"github.com/aead/chacha20/chacha"
|
||||
"github.com/aead/poly1305"
|
||||
"golang.org/x/crypto/chacha20"
|
||||
"golang.org/x/crypto/poly1305"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -27,7 +27,7 @@ func Seal(out, nonce, message, key []byte) []byte {
|
|||
}
|
||||
|
||||
var firstBlock [64]byte
|
||||
cipher, _ := chacha.NewCipher(nonce, key, 20)
|
||||
cipher, _ := chacha20.NewUnauthenticatedCipher(key, nonce)
|
||||
cipher.XORKeyStream(firstBlock[:], firstBlock[:])
|
||||
var polyKey [32]byte
|
||||
copy(polyKey[:], firstBlock[:32])
|
||||
|
@ -51,7 +51,7 @@ func Seal(out, nonce, message, key []byte) []byte {
|
|||
cipher.XORKeyStream(out, message)
|
||||
|
||||
var tag [TagSize]byte
|
||||
hash := poly1305.New(polyKey)
|
||||
hash := poly1305.New(&polyKey)
|
||||
hash.Write(ciphertext)
|
||||
hash.Sum(tag[:0])
|
||||
copy(tagOut, tag[:])
|
||||
|
@ -72,14 +72,14 @@ func Open(out, nonce, box, key []byte) ([]byte, error) {
|
|||
}
|
||||
|
||||
var firstBlock [64]byte
|
||||
cipher, _ := chacha.NewCipher(nonce, key, 20)
|
||||
cipher, _ := chacha20.NewUnauthenticatedCipher(key, nonce)
|
||||
cipher.XORKeyStream(firstBlock[:], firstBlock[:])
|
||||
var polyKey [32]byte
|
||||
copy(polyKey[:], firstBlock[:32])
|
||||
|
||||
var tag [TagSize]byte
|
||||
ciphertext := box[TagSize:]
|
||||
hash := poly1305.New(polyKey)
|
||||
hash := poly1305.New(&polyKey)
|
||||
hash.Write(ciphertext)
|
||||
hash.Sum(tag[:0])
|
||||
if subtle.ConstantTimeCompare(tag[:], box[:TagSize]) != 1 {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue