Update deps; especially for chacha20

This commit is contained in:
Frank Denis 2018-03-26 20:46:25 +02:00
parent 9224e79c59
commit 10f0503d50
23 changed files with 1785 additions and 1282 deletions

10
Gopkg.lock generated
View file

@ -23,7 +23,7 @@
branch = "master"
name = "github.com/aead/chacha20"
packages = ["chacha"]
revision = "c8d29375923a8e1d2a0f0dc0fc1d8a0aba5b97ba"
revision = "e0d4ab3067da29fbce5b60445bed6d54c41c3c62"
[[projects]]
branch = "master"
@ -122,7 +122,7 @@
branch = "master"
name = "github.com/kardianos/service"
packages = ["."]
revision = "0ab6efe2ea51f0531a8ceaaa33416b3aab844c28"
revision = "615a14ed75099c9eaac6949e22ac2341bf9d3197"
[[projects]]
name = "github.com/miekg/dns"
@ -148,7 +148,7 @@
"poly1305",
"salsa20/salsa"
]
revision = "374053ea96cb300f8671b8d3b07edeeb06e203b4"
revision = "88942b9c40a4c9d203b82b3731787b672d6e809b"
[[projects]]
branch = "master"
@ -160,7 +160,7 @@
"ipv4",
"ipv6"
]
revision = "24dd3780ca4f75fed9f321890729414a4b5d3f13"
revision = "6078986fec03a1dcc236c34816c71b0e05018fda"
[[projects]]
branch = "master"
@ -172,7 +172,7 @@
"windows/svc/eventlog",
"windows/svc/mgr"
]
revision = "2f1e207ee39ff70f3433e49c6eb52677a515e3b5"
revision = "13d03a9a82fba647c21a0ef8fba44a795d0f0835"
[[projects]]
name = "gopkg.in/natefinch/lumberjack.v2"

View file

@ -1,19 +1,23 @@
language: go
go:
- 1.5.3
- 1.6
- 1.7
- 1.8
- master
- "1.8.7"
- "1.9.4"
- "1.10"
env:
- ARCH=x86_64
- ARCH=i686
- TRAVIS_GOARCH=amd64
- TRAVIS_GOARCH=386
- TRAVIS_GOARCH=386
before_install:
- export GOARCH=$TRAVIS_GOARCH
branches:
only:
- master
script:
- diff -au <(gofmt -d .) <(printf "")
- go test -v ./...

View file

@ -1,16 +1,19 @@
[![Godoc Reference](https://godoc.org/github.com/aead/chacha20?status.svg)](https://godoc.org/github.com/aead/chacha20)
[![Build Status](https://travis-ci.org/aead/chacha20.svg?branch=master)](https://travis-ci.org/aead/chacha20)
[![Go Report Card](https://goreportcard.com/badge/aead/chacha20)](https://goreportcard.com/report/aead/chacha20)
## The ChaCha20 stream cipher
ChaCha is a stream cipher family created by Daniel J. Bernstein.
The most common ChaCha cipher is ChaCha20 (20 rounds). ChaCha20 is standardized in [RFC 7539](https://tools.ietf.org/html/rfc7539 "RFC 7539").
The most common ChaCha variant is ChaCha20 (20 rounds). ChaCha20 is
standardized in [RFC 7539](https://tools.ietf.org/html/rfc7539 "RFC 7539").
This package provides implementations of three ChaCha versions:
- ChaCha20 with a 64 bit nonce (can en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
- ChaCha20 with a 96 bit nonce (can en/decrypt up to 2^32 * 64 bytes ~ 256 GB for one key-nonce combination)
- XChaCha20 with a 192 bit nonce (can en/decrypt up to 2^64 * 64 bytes for one key-nonce combination)
Furthermore the chacha subpackage implements ChaCha20/12 and ChaCha20/8.
Furthermore the chacha sub package implements ChaCha20/12 and ChaCha20/8.
These versions use 12 or 8 rounds instead of 20.
But it's recommended to use ChaCha20 (with 20 rounds) - it will be fast enough for almost all purposes.
@ -18,8 +21,8 @@ But it's recommended to use ChaCha20 (with 20 rounds) - it will be fast enough f
Install in your GOPATH: `go get -u github.com/aead/chacha20`
### Requirements
All go versions >= 1.5.3 are supported.
Please notice, that the amd64 AVX2 asm implementation requires go1.7 or newer.
All go versions >= 1.8.7 are supported.
The code may also work on Go 1.7 but this is not tested.
### Performance

View file

@ -28,6 +28,7 @@ const (
var (
useSSE2 bool
useSSSE3 bool
useAVX bool
useAVX2 bool
)

View file

@ -2,111 +2,10 @@
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build go1.7,amd64,!gccgo,!appengine,!nacl
// +build amd64,!gccgo,!appengine,!nacl
#include "textflag.h"
DATA ·sigma_AVX<>+0x00(SB)/4, $0x61707865
DATA ·sigma_AVX<>+0x04(SB)/4, $0x3320646e
DATA ·sigma_AVX<>+0x08(SB)/4, $0x79622d32
DATA ·sigma_AVX<>+0x0C(SB)/4, $0x6b206574
GLOBL ·sigma_AVX<>(SB), (NOPTR+RODATA), $16
DATA ·one_AVX<>+0x00(SB)/8, $1
DATA ·one_AVX<>+0x08(SB)/8, $0
GLOBL ·one_AVX<>(SB), (NOPTR+RODATA), $16
DATA ·one_AVX2<>+0x00(SB)/8, $0
DATA ·one_AVX2<>+0x08(SB)/8, $0
DATA ·one_AVX2<>+0x10(SB)/8, $1
DATA ·one_AVX2<>+0x18(SB)/8, $0
GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32
DATA ·two_AVX2<>+0x00(SB)/8, $2
DATA ·two_AVX2<>+0x08(SB)/8, $0
DATA ·two_AVX2<>+0x10(SB)/8, $2
DATA ·two_AVX2<>+0x18(SB)/8, $0
GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32
DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302
DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302
DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A
GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32
DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003
DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003
DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B
GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32
#define ROTL(n, t, v) \
VPSLLD $n, v, t; \
VPSRLD $(32-n), v, v; \
VPXOR v, t, v
#define CHACHA_QROUND(v0, v1, v2, v3, t, c16, c8) \
VPADDD v0, v1, v0; \
VPXOR v3, v0, v3; \
VPSHUFB c16, v3, v3; \
VPADDD v2, v3, v2; \
VPXOR v1, v2, v1; \
ROTL(12, t, v1); \
VPADDD v0, v1, v0; \
VPXOR v3, v0, v3; \
VPSHUFB c8, v3, v3; \
VPADDD v2, v3, v2; \
VPXOR v1, v2, v1; \
ROTL(7, t, v1)
#define CHACHA_SHUFFLE(v1, v2, v3) \
VPSHUFD $0x39, v1, v1; \
VPSHUFD $0x4E, v2, v2; \
VPSHUFD $-109, v3, v3
#define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
VMOVDQU (0+off)(src), t0; \
VPERM2I128 $32, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (0+off)(dst); \
VMOVDQU (32+off)(src), t0; \
VPERM2I128 $32, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (32+off)(dst); \
VMOVDQU (64+off)(src), t0; \
VPERM2I128 $49, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (64+off)(dst); \
VMOVDQU (96+off)(src), t0; \
VPERM2I128 $49, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (96+off)(dst)
#define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
VMOVDQU (0+off)(src), t0; \
VPERM2I128 $32, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (0+off)(dst); \
VMOVDQU (32+off)(src), t0; \
VPERM2I128 $32, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (32+off)(dst); \
#define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \
VPERM2I128 $49, v1, v0, t0; \
VMOVDQU t0, 0(dst); \
VPERM2I128 $49, v3, v2, t0; \
VMOVDQU t0, 32(dst)
#define XOR_AVX(dst, src, off, v0, v1, v2, v3, t0) \
VPXOR 0+off(src), v0, t0; \
VMOVDQU t0, 0+off(dst); \
VPXOR 16+off(src), v1, t0; \
VMOVDQU t0, 16+off(dst); \
VPXOR 32+off(src), v2, t0; \
VMOVDQU t0, 32+off(dst); \
VPXOR 48+off(src), v3, t0; \
VMOVDQU t0, 48+off(dst)
#include "const.s"
#include "macro.s"
#define TWO 0(SP)
#define C16 32(SP)
@ -185,28 +84,28 @@ at_least_512:
chacha_loop_512:
VMOVDQA Y8, TMP_0
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y8, C16, C8)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y8, C16, C8)
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8)
VMOVDQA TMP_0, Y8
VMOVDQA Y0, TMP_0
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y0, C16, C8)
CHACHA_QROUND(Y12, Y13, Y14, Y15, Y0, C16, C8)
CHACHA_SHUFFLE(Y1, Y2, Y3)
CHACHA_SHUFFLE(Y5, Y6, Y7)
CHACHA_SHUFFLE(Y9, Y10, Y11)
CHACHA_SHUFFLE(Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8)
CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8)
CHACHA_SHUFFLE_AVX(Y1, Y2, Y3)
CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
CHACHA_SHUFFLE_AVX(Y9, Y10, Y11)
CHACHA_SHUFFLE_AVX(Y13, Y14, Y15)
CHACHA_QROUND(Y12, Y13, Y14, Y15, Y0, C16, C8)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y0, C16, C8)
CHACHA_QROUND_AVX(Y12, Y13, Y14, Y15, Y0, C16, C8)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y0, C16, C8)
VMOVDQA TMP_0, Y0
VMOVDQA Y8, TMP_0
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y8, C16, C8)
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y8, C16, C8)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y8, C16, C8)
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y8, C16, C8)
VMOVDQA TMP_0, Y8
CHACHA_SHUFFLE(Y3, Y2, Y1)
CHACHA_SHUFFLE(Y7, Y6, Y5)
CHACHA_SHUFFLE(Y11, Y10, Y9)
CHACHA_SHUFFLE(Y15, Y14, Y13)
CHACHA_SHUFFLE_AVX(Y3, Y2, Y1)
CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
CHACHA_SHUFFLE_AVX(Y11, Y10, Y9)
CHACHA_SHUFFLE_AVX(Y15, Y14, Y13)
SUBQ $2, R9
JA chacha_loop_512
@ -289,18 +188,18 @@ between_320_and_448:
MOVQ DX, R9
chacha_loop_384:
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y1, Y2, Y3)
CHACHA_SHUFFLE(Y5, Y6, Y7)
CHACHA_SHUFFLE(Y9, Y10, Y11)
CHACHA_QROUND(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y3, Y2, Y1)
CHACHA_SHUFFLE(Y7, Y6, Y5)
CHACHA_SHUFFLE(Y11, Y10, Y9)
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE_AVX(Y1, Y2, Y3)
CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
CHACHA_SHUFFLE_AVX(Y9, Y10, Y11)
CHACHA_QROUND_AVX(Y0, Y1, Y2, Y3, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE_AVX(Y3, Y2, Y1)
CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
CHACHA_SHUFFLE_AVX(Y11, Y10, Y9)
SUBQ $2, R9
JA chacha_loop_384
@ -361,14 +260,14 @@ between_192_and_320:
MOVQ DX, R9
chacha_loop_256:
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y5, Y6, Y7)
CHACHA_SHUFFLE(Y9, Y10, Y11)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y7, Y6, Y5)
CHACHA_SHUFFLE(Y11, Y10, Y9)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
CHACHA_SHUFFLE_AVX(Y9, Y10, Y11)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_QROUND_AVX(Y8, Y9, Y10, Y11, Y13, Y14, Y15)
CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
CHACHA_SHUFFLE_AVX(Y11, Y10, Y9)
SUBQ $2, R9
JA chacha_loop_256
@ -413,10 +312,10 @@ between_64_and_192:
MOVQ DX, R9
chacha_loop_128:
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y5, Y6, Y7)
CHACHA_QROUND(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_SHUFFLE(Y7, Y6, Y5)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_SHUFFLE_AVX(Y5, Y6, Y7)
CHACHA_QROUND_AVX(Y4, Y5, Y6, Y7, Y13, Y14, Y15)
CHACHA_SHUFFLE_AVX(Y7, Y6, Y5)
SUBQ $2, R9
JA chacha_loop_128
@ -455,10 +354,10 @@ between_0_and_64:
MOVQ DX, R9
chacha_loop_64:
CHACHA_QROUND(X4, X5, X6, X7, X13, X14, X15)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_QROUND(X4, X5, X6, X7, X13, X14, X15)
CHACHA_SHUFFLE(X7, X6, X5)
CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15)
CHACHA_SHUFFLE_AVX(X5, X6, X7)
CHACHA_QROUND_AVX(X4, X5, X6, X7, X13, X14, X15)
CHACHA_SHUFFLE_AVX(X7, X6, X5)
SUBQ $2, R9
JA chacha_loop_64
@ -466,7 +365,7 @@ chacha_loop_64:
VPADDD X1, X5, X5
VPADDD X2, X6, X6
VPADDD X3, X7, X7
VMOVDQU ·one_AVX<>(SB), X0
VMOVDQU ·one<>(SB), X0
VPADDQ X0, X3, X3
CMPQ CX, $64
@ -505,38 +404,3 @@ done:
MOVQ CX, ret+72(FP)
RET
// func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20AVX(SB), 4, $0-24
MOVQ out+0(FP), DI
MOVQ nonce+8(FP), AX
MOVQ key+16(FP), BX
VMOVDQU ·sigma_AVX<>(SB), X0
VMOVDQU 0(BX), X1
VMOVDQU 16(BX), X2
VMOVDQU 0(AX), X3
VMOVDQU ·rol16_AVX2<>(SB), X5
VMOVDQU ·rol8_AVX2<>(SB), X6
MOVQ $20, CX
chacha_loop:
CHACHA_QROUND(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_QROUND(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X3, X2, X1)
SUBQ $2, CX
JNZ chacha_loop
VMOVDQU X0, 0(DI)
VMOVDQU X3, 16(DI)
VZEROUPPER
RET
// func supportsAVX2() bool
TEXT ·supportsAVX2(SB), 4, $0-1
MOVQ runtime·support_avx(SB), AX
MOVQ runtime·support_avx2(SB), BX
ANDQ AX, BX
MOVB BX, ret+0(FP)
RET

View file

@ -11,6 +11,7 @@ import "encoding/binary"
func init() {
useSSE2 = supportsSSE2()
useSSSE3 = supportsSSSE3()
useAVX = false
useAVX2 = false
}

View file

@ -4,239 +4,25 @@
// +build 386,!gccgo,!appengine,!nacl
#include "textflag.h"
DATA ·sigma<>+0x00(SB)/4, $0x61707865
DATA ·sigma<>+0x04(SB)/4, $0x3320646e
DATA ·sigma<>+0x08(SB)/4, $0x79622d32
DATA ·sigma<>+0x0C(SB)/4, $0x6b206574
GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16
DATA ·one<>+0x00(SB)/8, $1
DATA ·one<>+0x08(SB)/8, $0
GLOBL ·one<>(SB), (NOPTR+RODATA), $16
DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302
DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16
DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003
DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16
#define ROTL_SSE2(n, t, v) \
MOVO v, t; \
PSLLL $n, t; \
PSRLL $(32-n), v; \
PXOR t, v
#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t0) \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE2(16, t0, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(12, t0, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE2(8, t0, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(7, t0, v1)
#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t0, r16, r8) \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r16, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(12, t0, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r8, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE2(7, t0, v1)
#define CHACHA_SHUFFLE(v1, v2, v3) \
PSHUFL $0x39, v1, v1; \
PSHUFL $0x4E, v2, v2; \
PSHUFL $0x93, v3, v3
#define XOR(dst, src, off, v0, v1, v2, v3, t0) \
MOVOU 0+off(src), t0; \
PXOR v0, t0; \
MOVOU t0, 0+off(dst); \
MOVOU 16+off(src), t0; \
PXOR v1, t0; \
MOVOU t0, 16+off(dst); \
MOVOU 32+off(src), t0; \
PXOR v2, t0; \
MOVOU t0, 32+off(dst); \
MOVOU 48+off(src), t0; \
PXOR v3, t0; \
MOVOU t0, 48+off(dst)
#include "const.s"
#include "macro.s"
// FINALIZE xors len bytes from src and block using
// the temp. registers t0 and t1 and writes the result
// to dst.
#define FINALIZE(dst, src, block, len, t0, t1) \
XORL t0, t0; \
XORL t1, t1; \
finalize: \
MOVB 0(src), t0; \
MOVB 0(block), t1; \
XORL t0, t1; \
MOVB t1, 0(dst); \
INCL src; \
INCL block; \
INCL dst; \
DECL len; \
JA finalize \
// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamSSE2(SB), 4, $0-40
MOVL dst_base+0(FP), DI
MOVL src_base+12(FP), SI
MOVL src_len+16(FP), CX
MOVL state+28(FP), AX
MOVL rounds+32(FP), DX
MOVOU 0(AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
TESTL CX, CX
JZ done
at_least_64:
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
MOVL DX, BX
chacha_loop:
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
CHACHA_SHUFFLE(X7, X6, X5)
SUBL $2, BX
JA chacha_loop
MOVOU 0(AX), X0
PADDL X0, X4
PADDL X1, X5
PADDL X2, X6
PADDL X3, X7
MOVOU ·one<>(SB), X0
PADDQ X0, X3
CMPL CX, $64
JB less_than_64
XOR(DI, SI, 0, X4, X5, X6, X7, X0)
MOVOU 0(AX), X0
ADDL $64, SI
ADDL $64, DI
SUBL $64, CX
JNZ at_least_64
less_than_64:
MOVL CX, BP
TESTL BP, BP
JZ done
MOVL block+24(FP), BX
MOVOU X4, 0(BX)
MOVOU X5, 16(BX)
MOVOU X6, 32(BX)
MOVOU X7, 48(BX)
FINALIZE(DI, SI, BX, BP, AX, DX)
done:
MOVL state+28(FP), AX
MOVOU X3, 48(AX)
MOVL CX, ret+36(FP)
RET
// func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamSSSE3(SB), 4, $64-40
MOVL dst_base+0(FP), DI
MOVL src_base+12(FP), SI
MOVL src_len+16(FP), CX
MOVL state+28(FP), AX
MOVL rounds+32(FP), DX
MOVOU 48(AX), X3
TESTL CX, CX
JZ done
MOVL SP, BP
ADDL $16, SP
ANDL $-16, SP
MOVOU ·one<>(SB), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVO X0, 0(SP)
MOVO X1, 16(SP)
MOVO X2, 32(SP)
MOVOU 0(AX), X0
MOVOU ·rol16<>(SB), X1
MOVOU ·rol8<>(SB), X2
at_least_64:
MOVO X0, X4
MOVO 16(SP), X5
MOVO 32(SP), X6
MOVO X3, X7
MOVL DX, BX
chacha_loop:
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
CHACHA_SHUFFLE(X5, X6, X7)
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
CHACHA_SHUFFLE(X7, X6, X5)
SUBL $2, BX
JA chacha_loop
MOVOU 0(AX), X0
PADDL X0, X4
PADDL 16(SP), X5
PADDL 32(SP), X6
PADDL X3, X7
PADDQ 0(SP), X3
CMPL CX, $64
JB less_than_64
XOR(DI, SI, 0, X4, X5, X6, X7, X0)
MOVOU 0(AX), X0
ADDL $64, SI
ADDL $64, DI
SUBL $64, CX
JNZ at_least_64
less_than_64:
MOVL BP, SP
MOVL CX, BP
TESTL BP, BP
JE done
MOVL block+24(FP), BX
MOVOU X4, 0(BX)
MOVOU X5, 16(BX)
MOVOU X6, 32(BX)
MOVOU X7, 48(BX)
FINALIZE(DI, SI, BX, BP, AX, DX)
done:
MOVL state+28(FP), AX
MOVOU X3, 48(AX)
MOVL CX, ret+36(FP)
RET
XORL t0, t0; \
XORL t1, t1; \
FINALIZE_LOOP:; \
MOVB 0(src), t0; \
MOVB 0(block), t1; \
XORL t0, t1; \
MOVB t1, 0(dst); \
INCL src; \
INCL block; \
INCL dst; \
DECL len; \
JA FINALIZE_LOOP \
// func supportsSSE2() bool
TEXT ·supportsSSE2(SB), NOSPLIT, $0-1
@ -258,54 +44,240 @@ TEXT ·supportsSSSE3(SB), NOSPLIT, $0-1
MOVB CX, ret+0(FP)
RET
#define Dst DI
#define Nonce AX
#define Key BX
#define Rounds CX
// func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20SSE2(SB), 4, $0-12
MOVL out+0(FP), DI
MOVL nonce+4(FP), AX
MOVL key+8(FP), BX
MOVL out+0(FP), Dst
MOVL nonce+4(FP), Nonce
MOVL key+8(FP), Key
MOVOU ·sigma<>(SB), X0
MOVOU 0(BX), X1
MOVOU 16(BX), X2
MOVOU 0(AX), X3
MOVL $20, CX
MOVOU 0*16(Key), X1
MOVOU 1*16(Key), X2
MOVOU 0*16(Nonce), X3
MOVL $20, Rounds
chacha_loop:
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_SHUFFLE_SSE(X1, X2, X3)
CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
CHACHA_SHUFFLE(X3, X2, X1)
SUBL $2, CX
CHACHA_SHUFFLE_SSE(X3, X2, X1)
SUBL $2, Rounds
JNZ chacha_loop
MOVOU X0, 0(DI)
MOVOU X3, 16(DI)
MOVOU X0, 0*16(Dst)
MOVOU X3, 1*16(Dst)
RET
// func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
TEXT ·hChaCha20SSSE3(SB), 4, $0-12
MOVL out+0(FP), DI
MOVL nonce+4(FP), AX
MOVL key+8(FP), BX
MOVL out+0(FP), Dst
MOVL nonce+4(FP), Nonce
MOVL key+8(FP), Key
MOVOU ·sigma<>(SB), X0
MOVOU 0(BX), X1
MOVOU 16(BX), X2
MOVOU 0(AX), X3
MOVOU 0*16(Key), X1
MOVOU 1*16(Key), X2
MOVOU 0*16(Nonce), X3
MOVL $20, Rounds
MOVOU ·rol16<>(SB), X5
MOVOU ·rol8<>(SB), X6
MOVL $20, CX
chacha_loop:
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X1, X2, X3)
CHACHA_SHUFFLE_SSE(X1, X2, X3)
CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
CHACHA_SHUFFLE(X3, X2, X1)
SUBL $2, CX
CHACHA_SHUFFLE_SSE(X3, X2, X1)
SUBL $2, Rounds
JNZ chacha_loop
MOVOU X0, 0(DI)
MOVOU X3, 16(DI)
MOVOU X0, 0*16(Dst)
MOVOU X3, 1*16(Dst)
RET
#undef Dst
#undef Nonce
#undef Key
#undef Rounds
#define State AX
#define Dst DI
#define Src SI
#define Len CX
#define Rounds DX
#define Tmp0 BX
#define Tmp1 BP
// func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamSSE2(SB), 4, $0-40
MOVL dst_base+0(FP), Dst
MOVL src_base+12(FP), Src
MOVL src_len+16(FP), Len
MOVL state+28(FP), State
MOVL rounds+32(FP), Rounds
MOVOU 0*16(State), X0
MOVOU 1*16(State), X1
MOVOU 2*16(State), X2
MOVOU 3*16(State), X3
TESTL Len, Len
JZ DONE
GENERATE_KEYSTREAM:
MOVO X0, X4
MOVO X1, X5
MOVO X2, X6
MOVO X3, X7
MOVL Rounds, Tmp0
CHACHA_LOOP:
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
CHACHA_SHUFFLE_SSE(X5, X6, X7)
CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0)
CHACHA_SHUFFLE_SSE(X7, X6, X5)
SUBL $2, Tmp0
JA CHACHA_LOOP
MOVOU 0*16(State), X0 // Restore X0 from state
PADDL X0, X4
PADDL X1, X5
PADDL X2, X6
PADDL X3, X7
MOVOU ·one<>(SB), X0
PADDQ X0, X3
CMPL Len, $64
JB BUFFER_KEYSTREAM
XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X0)
MOVOU 0*16(State), X0 // Restore X0 from state
ADDL $64, Src
ADDL $64, Dst
SUBL $64, Len
JZ DONE
JMP GENERATE_KEYSTREAM // There is at least one more plaintext byte
BUFFER_KEYSTREAM:
MOVL block+24(FP), State
MOVOU X4, 0(State)
MOVOU X5, 16(State)
MOVOU X6, 32(State)
MOVOU X7, 48(State)
MOVL Len, Rounds // Use Rounds as tmp. register for Len - we don't need Rounds anymore
FINALIZE(Dst, Src, State, Rounds, Tmp0, Tmp1)
DONE:
MOVL state+28(FP), State
MOVOU X3, 3*16(State)
MOVL Len, ret+36(FP) // Number of bytes written to the keystream buffer - 0 iff Len mod 64 == 0
RET
#undef State
#undef Dst
#undef Src
#undef Len
#undef Rounds
#undef Tmp0
#undef Tmp1
#define Dst DI
#define Src SI
#define Len CX
#define Rounds DX
#define State SP
#define Stack State
#define Tmp0 AX
#define Tmp1 BX
#define Tmp2 BP
// func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
TEXT ·xorKeyStreamSSSE3(SB), 4, $80-40
MOVL dst_base+0(FP), Dst
MOVL src_base+12(FP), Src
MOVL src_len+16(FP), Len
MOVL state+28(FP), Tmp0
MOVL rounds+32(FP), Rounds
MOVL Stack, Tmp2 // save stack pointer
ADDL $16, Stack // ensure 16 byte stack alignment
ANDL $-16, Stack
MOVOU 0*16(Tmp0), X0
MOVOU 1*16(Tmp0), X1
MOVOU 2*16(Tmp0), X2
MOVOU 3*16(Tmp0), X3
TESTL Len, Len
JZ DONE
MOVOU ·one<>(SB), X4
MOVO X0, 0*16(State)
MOVO X1, 1*16(State)
MOVO X2, 2*16(State)
MOVO X4, 3*16(Stack) // store constant on stack
MOVOU ·rol16<>(SB), X1
MOVOU ·rol8<>(SB), X2
GENERATE_KEYSTREAM:
MOVO 0*16(State), X4
MOVO 1*16(State), X5
MOVO 2*16(State), X6
MOVO X3, X7
MOVL Rounds, Tmp0
CHACHA_LOOP:
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
CHACHA_SHUFFLE_SSE(X5, X6, X7)
CHACHA_QROUND_SSSE3(X4, X5, X6, X7, X0, X1, X2)
CHACHA_SHUFFLE_SSE(X7, X6, X5)
SUBL $2, Tmp0
JA CHACHA_LOOP
PADDL 0*16(State), X4
PADDL 1*16(State), X5
PADDL 2*16(State), X6
PADDL X3, X7
PADDQ 3*16(Stack), X3
CMPL Len, $64
JB BUFFER_KEYSTREAM
XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X0)
ADDL $64, Src
ADDL $64, Dst
SUBL $64, Len
JZ DONE
JMP GENERATE_KEYSTREAM
BUFFER_KEYSTREAM:
MOVL Tmp2, Stack // restore stack pointer
MOVL Len, Tmp2
MOVL block+24(FP), Tmp1
MOVOU X4, 0*16(Tmp1)
MOVOU X5, 1*16(Tmp1)
MOVOU X6, 2*16(Tmp1)
MOVOU X7, 3*16(Tmp1)
FINALIZE(DI, SI, Tmp1, Tmp2, Tmp0, Rounds)// we don't need the number of rounds anymore
MOVL Stack, Tmp2 // set BP to SP so that DONE resets SP correctly
DONE:
MOVL Tmp2, Stack // restore stack pointer
MOVL state+28(FP), Tmp0
MOVOU X3, 3*16(Tmp0)
MOVL Len, ret+36(FP)
RET
#undef Dst
#undef Src
#undef Len
#undef Rounds
#undef State
#undef Stack
#undef Tmp0
#undef Tmp1
#undef Tmp2

View file

@ -9,6 +9,7 @@ package chacha
func init() {
useSSE2 = true
useSSSE3 = supportsSSSE3()
useAVX = supportsAVX()
useAVX2 = supportsAVX2() && false // disable until #16 is fixed
}
@ -20,7 +21,11 @@ func initialize(state *[64]byte, key []byte, nonce *[16]byte)
//go:noescape
func supportsSSSE3() bool
// This function is implemented in chachaAVX2_amd64.s
// This function is implemented in chacha_amd64.s
//go:noescape
func supportsAVX() bool
// This function is implemented in chacha_amd64.s
//go:noescape
func supportsAVX2() bool
@ -44,12 +49,16 @@ func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
//go:noescape
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chacha_amd64.s
//go:noescape
func xorKeyStreamAVX(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chachaAVX2_amd64.s
//go:noescape
func xorKeyStreamAVX2(dst, src []byte, block, state *[64]byte, rounds int) int
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
if useAVX2 {
if useAVX {
hChaCha20AVX(out, nonce, key)
} else if useSSSE3 {
hChaCha20SSSE3(out, nonce, key)
@ -63,6 +72,8 @@ func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
if useAVX2 {
return xorKeyStreamAVX2(dst, src, block, state, rounds)
} else if useAVX {
return xorKeyStreamAVX(dst, src, block, state, rounds)
} else if useSSSE3 {
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
} else if useSSE2 { // on amd64 this is always true - necessary for testing generic on amd64

File diff suppressed because it is too large Load diff

View file

@ -1,56 +0,0 @@
// Copyright (c) 2017 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build amd64,!gccgo,!appengine,!nacl,!go1.7
package chacha
func init() {
useSSE2 = true
useSSSE3 = supportsSSSE3()
useAVX2 = false
}
// This function is implemented in chacha_amd64.s
//go:noescape
func initialize(state *[64]byte, key []byte, nonce *[16]byte)
// This function is implemented in chacha_amd64.s
//go:noescape
func supportsSSSE3() bool
// This function is implemented in chacha_amd64.s
//go:noescape
func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_amd64.s
//go:noescape
func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
// This function is implemented in chacha_amd64.s
//go:noescape
func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int
// This function is implemented in chacha_amd64.s
//go:noescape
func xorKeyStreamSSSE3(dst, src []byte, block, state *[64]byte, rounds int) int
func hChaCha20(out *[32]byte, nonce *[16]byte, key *[32]byte) {
if useSSSE3 {
hChaCha20SSSE3(out, nonce, key)
} else if useSSE2 { // on amd64 this is always true - used to test generic on amd64
hChaCha20SSE2(out, nonce, key)
} else {
hChaCha20Generic(out, nonce, key)
}
}
func xorKeyStream(dst, src []byte, block, state *[64]byte, rounds int) int {
if useSSSE3 {
return xorKeyStreamSSSE3(dst, src, block, state, rounds)
} else if useSSE2 { // on amd64 this is always true - used to test generic on amd64
return xorKeyStreamSSE2(dst, src, block, state, rounds)
}
return xorKeyStreamGeneric(dst, src, block, state, rounds)
}

View file

@ -8,6 +8,13 @@ package chacha
import "encoding/binary"
func init() {
useSSE2 = false
useSSSE3 = false
useAVX = false
useAVX2 = false
}
func initialize(state *[64]byte, key []byte, nonce *[16]byte) {
binary.LittleEndian.PutUint32(state[0:], sigma[0])
binary.LittleEndian.PutUint32(state[4:], sigma[1])

View file

@ -23,15 +23,20 @@ func fromHex(bits string) []byte {
}
func TestHChaCha20(t *testing.T) {
defer func(sse2, ssse3, avx2 bool) {
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
}(useSSE2, useSSSE3, useAVX2)
defer func(sse2, ssse3, avx, avx2 bool) {
useSSE2, useSSSE3, useAVX, useAVX2 = sse2, ssse3, avx, avx2
}(useSSE2, useSSSE3, useAVX, useAVX2)
if useAVX2 {
t.Log("AVX2 version")
testHChaCha20(t)
useAVX2 = false
}
if useAVX {
t.Log("AVX version")
testIncremental(t, 5, 2049)
useAVX = false
}
if useSSSE3 {
t.Log("SSSE3 version")
testHChaCha20(t)
@ -47,15 +52,20 @@ func TestHChaCha20(t *testing.T) {
}
func TestVectors(t *testing.T) {
defer func(sse2, ssse3, avx2 bool) {
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
}(useSSE2, useSSSE3, useAVX2)
defer func(sse2, ssse3, avx, avx2 bool) {
useSSE2, useSSSE3, useAVX, useAVX2 = sse2, ssse3, avx, avx2
}(useSSE2, useSSSE3, useAVX, useAVX2)
if useAVX2 {
t.Log("AVX2 version")
testVectors(t)
useAVX2 = false
}
if useAVX {
t.Log("AVX version")
testIncremental(t, 5, 2049)
useAVX = false
}
if useSSSE3 {
t.Log("SSSE3 version")
testVectors(t)
@ -71,15 +81,20 @@ func TestVectors(t *testing.T) {
}
func TestIncremental(t *testing.T) {
defer func(sse2, ssse3, avx2 bool) {
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
}(useSSE2, useSSSE3, useAVX2)
defer func(sse2, ssse3, avx, avx2 bool) {
useSSE2, useSSSE3, useAVX, useAVX2 = sse2, ssse3, avx, avx2
}(useSSE2, useSSSE3, useAVX, useAVX2)
if useAVX2 {
t.Log("AVX2 version")
testIncremental(t, 5, 2049)
useAVX2 = false
}
if useAVX {
t.Log("AVX version")
testIncremental(t, 5, 2049)
useAVX = false
}
if useSSSE3 {
t.Log("SSSE3 version")
testIncremental(t, 5, 2049)
@ -131,7 +146,7 @@ func testVectors(t *testing.T) {
}
func testIncremental(t *testing.T, iter int, size int) {
sse2, ssse3, avx2 := useSSE2, useSSSE3, useAVX2
sse2, ssse3, avx, avx2 := useSSE2, useSSSE3, useAVX, useAVX2
msg, ref, stream := make([]byte, size), make([]byte, size), make([]byte, size)
for i := 0; i < iter; i++ {
@ -154,21 +169,21 @@ func testIncremental(t *testing.T, iter int, size int) {
}
for j := 0; j <= len(msg); j++ {
useSSE2, useSSSE3, useAVX2 = false, false, false
useSSE2, useSSSE3, useAVX, useAVX2 = false, false, false, false
XORKeyStream(ref[:j], msg[:j], nonce, key[:], 20)
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
useSSE2, useSSSE3, useAVX, useAVX2 = sse2, ssse3, avx, avx2
XORKeyStream(stream[:j], msg[:j], nonce, key[:], 20)
if !bytes.Equal(ref[:j], stream[:j]) {
t.Fatalf("Iteration %d failed:\n Message length: %d\n\n got: %s\nwant: %s", i, j, toHex(stream[:j]), toHex(ref[:j]))
}
useSSE2, useSSSE3, useAVX2 = false, false, false
useSSE2, useSSSE3, useAVX, useAVX2 = false, false, false, false
c, _ := NewCipher(nonce, key[:], 20)
c.XORKeyStream(stream[:j], msg[:j])
useSSE2, useSSSE3, useAVX2 = sse2, ssse3, avx2
useSSE2, useSSSE3, useAVX, useAVX2 = sse2, ssse3, avx, avx2
c, _ = NewCipher(nonce, key[:], 20)
c.XORKeyStream(stream[:j], msg[:j])

53
vendor/github.com/aead/chacha20/chacha/const.s generated vendored Normal file
View file

@ -0,0 +1,53 @@
// Copyright (c) 2018 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl
#include "textflag.h"
DATA ·sigma<>+0x00(SB)/4, $0x61707865
DATA ·sigma<>+0x04(SB)/4, $0x3320646e
DATA ·sigma<>+0x08(SB)/4, $0x79622d32
DATA ·sigma<>+0x0C(SB)/4, $0x6b206574
GLOBL ·sigma<>(SB), (NOPTR+RODATA), $16 // The 4 ChaCha initialization constants
// SSE2/SSE3/AVX constants
DATA ·one<>+0x00(SB)/8, $1
DATA ·one<>+0x08(SB)/8, $0
GLOBL ·one<>(SB), (NOPTR+RODATA), $16 // The constant 1 as 128 bit value
DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302
DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
GLOBL ·rol16<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 16 bit left rotate constant
DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003
DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
GLOBL ·rol8<>(SB), (NOPTR+RODATA), $16 // The PSHUFB 8 bit left rotate constant
// AVX2 constants
DATA ·one_AVX2<>+0x00(SB)/8, $0
DATA ·one_AVX2<>+0x08(SB)/8, $0
DATA ·one_AVX2<>+0x10(SB)/8, $1
DATA ·one_AVX2<>+0x18(SB)/8, $0
GLOBL ·one_AVX2<>(SB), (NOPTR+RODATA), $32 // The constant 1 as 256 bit value
DATA ·two_AVX2<>+0x00(SB)/8, $2
DATA ·two_AVX2<>+0x08(SB)/8, $0
DATA ·two_AVX2<>+0x10(SB)/8, $2
DATA ·two_AVX2<>+0x18(SB)/8, $0
GLOBL ·two_AVX2<>(SB), (NOPTR+RODATA), $32
DATA ·rol16_AVX2<>+0x00(SB)/8, $0x0504070601000302
DATA ·rol16_AVX2<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
DATA ·rol16_AVX2<>+0x10(SB)/8, $0x0504070601000302
DATA ·rol16_AVX2<>+0x18(SB)/8, $0x0D0C0F0E09080B0A
GLOBL ·rol16_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 16 bit left rotate constant
DATA ·rol8_AVX2<>+0x00(SB)/8, $0x0605040702010003
DATA ·rol8_AVX2<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
DATA ·rol8_AVX2<>+0x10(SB)/8, $0x0605040702010003
DATA ·rol8_AVX2<>+0x18(SB)/8, $0x0E0D0C0F0A09080B
GLOBL ·rol8_AVX2<>(SB), (NOPTR+RODATA), $32 // The VPSHUFB 8 bit left rotate constant

163
vendor/github.com/aead/chacha20/chacha/macro.s generated vendored Normal file
View file

@ -0,0 +1,163 @@
// Copyright (c) 2018 Andreas Auernhammer. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
// +build 386,!gccgo,!appengine,!nacl amd64,!gccgo,!appengine,!nacl
// ROTL_SSE rotates all 4 32 bit values of the XMM register v
// left by n bits using SSE2 instructions (0 <= n <= 32).
// The XMM register t is used as a temp. register.
#define ROTL_SSE(n, t, v) \
MOVO v, t; \
PSLLL $n, t; \
PSRLL $(32-n), v; \
PXOR t, v
// ROTL_AVX rotates all 4/8 32 bit values of the AVX/AVX2 register v
// left by n bits using AVX/AVX2 instructions (0 <= n <= 32).
// The AVX/AVX2 register t is used as a temp. register.
#define ROTL_AVX(n, t, v) \
VPSLLD $n, v, t; \
VPSRLD $(32-n), v, v; \
VPXOR v, t, v
// CHACHA_QROUND_SSE2 performs a ChaCha quarter-round using the
// 4 XMM registers v0, v1, v2 and v3. It uses only ROTL_SSE2 for
// rotations. The XMM register t is used as a temp. register.
#define CHACHA_QROUND_SSE2(v0, v1, v2, v3, t) \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE(16, t, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE(12, t, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
ROTL_SSE(8, t, v3); \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE(7, t, v1)
// CHACHA_QROUND_SSSE3 performs a ChaCha quarter-round using the
// 4 XMM registers v0, v1, v2 and v3. It uses PSHUFB for 8/16 bit
// rotations. The XMM register t is used as a temp. register.
//
// r16 holds the PSHUFB constant for a 16 bit left rotate.
// r8 holds the PSHUFB constant for a 8 bit left rotate.
#define CHACHA_QROUND_SSSE3(v0, v1, v2, v3, t, r16, r8) \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r16, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE(12, t, v1); \
PADDL v1, v0; \
PXOR v0, v3; \
PSHUFB r8, v3; \
PADDL v3, v2; \
PXOR v2, v1; \
ROTL_SSE(7, t, v1)
// CHACHA_QROUND_AVX performs a ChaCha quarter-round using the
// 4 AVX/AVX2 registers v0, v1, v2 and v3. It uses VPSHUFB for 8/16 bit
// rotations. The AVX/AVX2 register t is used as a temp. register.
//
// r16 holds the VPSHUFB constant for a 16 bit left rotate.
// r8 holds the VPSHUFB constant for a 8 bit left rotate.
#define CHACHA_QROUND_AVX(v0, v1, v2, v3, t, r16, r8) \
VPADDD v0, v1, v0; \
VPXOR v3, v0, v3; \
VPSHUFB r16, v3, v3; \
VPADDD v2, v3, v2; \
VPXOR v1, v2, v1; \
ROTL_AVX(12, t, v1); \
VPADDD v0, v1, v0; \
VPXOR v3, v0, v3; \
VPSHUFB r8, v3, v3; \
VPADDD v2, v3, v2; \
VPXOR v1, v2, v1; \
ROTL_AVX(7, t, v1)
// CHACHA_SHUFFLE_SSE performs a ChaCha shuffle using the
// 3 XMM registers v1, v2 and v3. The inverse shuffle is
// performed by switching v1 and v3: CHACHA_SHUFFLE_SSE(v3, v2, v1).
#define CHACHA_SHUFFLE_SSE(v1, v2, v3) \
PSHUFL $0x39, v1, v1; \
PSHUFL $0x4E, v2, v2; \
PSHUFL $0x93, v3, v3
// CHACHA_SHUFFLE_AVX performs a ChaCha shuffle using the
// 3 AVX/AVX2 registers v1, v2 and v3. The inverse shuffle is
// performed by switching v1 and v3: CHACHA_SHUFFLE_AVX(v3, v2, v1).
#define CHACHA_SHUFFLE_AVX(v1, v2, v3) \
VPSHUFD $0x39, v1, v1; \
VPSHUFD $0x4E, v2, v2; \
VPSHUFD $0x93, v3, v3
// XOR_SSE extracts 4x16 byte vectors from src at
// off, xors all vectors with the corresponding XMM
// register (v0 - v3) and writes the result to dst
// at off.
// The XMM register t is used as a temp. register.
#define XOR_SSE(dst, src, off, v0, v1, v2, v3, t) \
MOVOU 0+off(src), t; \
PXOR v0, t; \
MOVOU t, 0+off(dst); \
MOVOU 16+off(src), t; \
PXOR v1, t; \
MOVOU t, 16+off(dst); \
MOVOU 32+off(src), t; \
PXOR v2, t; \
MOVOU t, 32+off(dst); \
MOVOU 48+off(src), t; \
PXOR v3, t; \
MOVOU t, 48+off(dst)
// XOR_AVX extracts 4x16 byte vectors from src at
// off, xors all vectors with the corresponding AVX
// register (v0 - v3) and writes the result to dst
// at off.
// The XMM register t is used as a temp. register.
#define XOR_AVX(dst, src, off, v0, v1, v2, v3, t) \
VPXOR 0+off(src), v0, t; \
VMOVDQU t, 0+off(dst); \
VPXOR 16+off(src), v1, t; \
VMOVDQU t, 16+off(dst); \
VPXOR 32+off(src), v2, t; \
VMOVDQU t, 32+off(dst); \
VPXOR 48+off(src), v3, t; \
VMOVDQU t, 48+off(dst)
#define XOR_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
VMOVDQU (0+off)(src), t0; \
VPERM2I128 $32, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (0+off)(dst); \
VMOVDQU (32+off)(src), t0; \
VPERM2I128 $32, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (32+off)(dst); \
VMOVDQU (64+off)(src), t0; \
VPERM2I128 $49, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (64+off)(dst); \
VMOVDQU (96+off)(src), t0; \
VPERM2I128 $49, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (96+off)(dst)
#define XOR_UPPER_AVX2(dst, src, off, v0, v1, v2, v3, t0, t1) \
VMOVDQU (0+off)(src), t0; \
VPERM2I128 $32, v1, v0, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (0+off)(dst); \
VMOVDQU (32+off)(src), t0; \
VPERM2I128 $32, v3, v2, t1; \
VPXOR t0, t1, t0; \
VMOVDQU t0, (32+off)(dst); \
#define EXTRACT_LOWER(dst, v0, v1, v2, v3, t0) \
VPERM2I128 $49, v1, v0, t0; \
VMOVDQU t0, 0(dst); \
VPERM2I128 $49, v3, v2, t0; \
VMOVDQU t0, 32(dst)

View file

@ -5,7 +5,7 @@ sudo: required
go:
- 1.8.x
- 1.9.x
- "1.10"
- 1.10.x
- master
before_install:

View file

@ -21,6 +21,13 @@ func isUpstart() bool {
if _, err := os.Stat("/sbin/upstart-udev-bridge"); err == nil {
return true
}
if _, err := os.Stat("/sbin/init"); err == nil {
if out, err := exec.Command("/sbin/init", "--version").Output(); err == nil {
if strings.Contains(string(out), "init (upstart") {
return true
}
}
}
return false
}

View file

@ -1,5 +1,5 @@
// go generate gen.go
// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT
// Code generated by the command above; DO NOT EDIT.
// Package iana provides protocol number resources managed by the Internet Assigned Numbers Authority (IANA).
package iana // import "golang.org/x/net/internal/iana"
@ -38,7 +38,7 @@ const (
CongestionExperienced = 0x3 // CE (Congestion Experienced)
)
// Protocol Numbers, Updated: 2016-06-22
// Protocol Numbers, Updated: 2017-10-13
const (
ProtocolIP = 0 // IPv4 encapsulation, pseudo protocol number
ProtocolHOPOPT = 0 // IPv6 Hop-by-Hop Option
@ -178,3 +178,50 @@ const (
ProtocolROHC = 142 // Robust Header Compression
ProtocolReserved = 255 // Reserved
)
// Address Family Numbers, Updated: 2016-10-25
const (
AddrFamilyIPv4 = 1 // IP (IP version 4)
AddrFamilyIPv6 = 2 // IP6 (IP version 6)
AddrFamilyNSAP = 3 // NSAP
AddrFamilyHDLC = 4 // HDLC (8-bit multidrop)
AddrFamilyBBN1822 = 5 // BBN 1822
AddrFamily802 = 6 // 802 (includes all 802 media plus Ethernet "canonical format")
AddrFamilyE163 = 7 // E.163
AddrFamilyE164 = 8 // E.164 (SMDS, Frame Relay, ATM)
AddrFamilyF69 = 9 // F.69 (Telex)
AddrFamilyX121 = 10 // X.121 (X.25, Frame Relay)
AddrFamilyIPX = 11 // IPX
AddrFamilyAppletalk = 12 // Appletalk
AddrFamilyDecnetIV = 13 // Decnet IV
AddrFamilyBanyanVines = 14 // Banyan Vines
AddrFamilyE164withSubaddress = 15 // E.164 with NSAP format subaddress
AddrFamilyDNS = 16 // DNS (Domain Name System)
AddrFamilyDistinguishedName = 17 // Distinguished Name
AddrFamilyASNumber = 18 // AS Number
AddrFamilyXTPoverIPv4 = 19 // XTP over IP version 4
AddrFamilyXTPoverIPv6 = 20 // XTP over IP version 6
AddrFamilyXTPnativemodeXTP = 21 // XTP native mode XTP
AddrFamilyFibreChannelWorldWidePortName = 22 // Fibre Channel World-Wide Port Name
AddrFamilyFibreChannelWorldWideNodeName = 23 // Fibre Channel World-Wide Node Name
AddrFamilyGWID = 24 // GWID
AddrFamilyL2VPN = 25 // AFI for L2VPN information
AddrFamilyMPLSTPSectionEndpointID = 26 // MPLS-TP Section Endpoint Identifier
AddrFamilyMPLSTPLSPEndpointID = 27 // MPLS-TP LSP Endpoint Identifier
AddrFamilyMPLSTPPseudowireEndpointID = 28 // MPLS-TP Pseudowire Endpoint Identifier
AddrFamilyMTIPv4 = 29 // MT IP: Multi-Topology IP version 4
AddrFamilyMTIPv6 = 30 // MT IPv6: Multi-Topology IP version 6
AddrFamilyEIGRPCommonServiceFamily = 16384 // EIGRP Common Service Family
AddrFamilyEIGRPIPv4ServiceFamily = 16385 // EIGRP IPv4 Service Family
AddrFamilyEIGRPIPv6ServiceFamily = 16386 // EIGRP IPv6 Service Family
AddrFamilyLISPCanonicalAddressFormat = 16387 // LISP Canonical Address Format (LCAF)
AddrFamilyBGPLS = 16388 // BGP-LS
AddrFamily48bitMAC = 16389 // 48-bit MAC
AddrFamily64bitMAC = 16390 // 64-bit MAC
AddrFamilyOUI = 16391 // OUI
AddrFamilyMACFinal24bits = 16392 // MAC/24
AddrFamilyMACFinal40bits = 16393 // MAC/40
AddrFamilyIPv6Initial64bits = 16394 // IPv6/64
AddrFamilyRBridgePortID = 16395 // RBridge Port ID
AddrFamilyTRILLNickname = 16396 // TRILL Nickname
)

View file

@ -39,12 +39,16 @@ var registries = []struct {
"https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xml",
parseProtocolNumbers,
},
{
"http://www.iana.org/assignments/address-family-numbers/address-family-numbers.xml",
parseAddrFamilyNumbers,
},
}
func main() {
var bb bytes.Buffer
fmt.Fprintf(&bb, "// go generate gen.go\n")
fmt.Fprintf(&bb, "// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT\n\n")
fmt.Fprintf(&bb, "// Code generated by the command above; DO NOT EDIT.\n\n")
fmt.Fprintf(&bb, "// Package iana provides protocol number resources managed by the Internet Assigned Numbers Authority (IANA).\n")
fmt.Fprintf(&bb, `package iana // import "golang.org/x/net/internal/iana"`+"\n\n")
for _, r := range registries {
@ -291,3 +295,93 @@ func (pn *protocolNumbers) escape() []canonProtocolRecord {
}
return prs
}
func parseAddrFamilyNumbers(w io.Writer, r io.Reader) error {
dec := xml.NewDecoder(r)
var afn addrFamilylNumbers
if err := dec.Decode(&afn); err != nil {
return err
}
afrs := afn.escape()
fmt.Fprintf(w, "// %s, Updated: %s\n", afn.Title, afn.Updated)
fmt.Fprintf(w, "const (\n")
for _, afr := range afrs {
if afr.Name == "" {
continue
}
fmt.Fprintf(w, "AddrFamily%s = %d", afr.Name, afr.Value)
fmt.Fprintf(w, "// %s\n", afr.Descr)
}
fmt.Fprintf(w, ")\n")
return nil
}
type addrFamilylNumbers struct {
XMLName xml.Name `xml:"registry"`
Title string `xml:"title"`
Updated string `xml:"updated"`
RegTitle string `xml:"registry>title"`
Note string `xml:"registry>note"`
Records []struct {
Value string `xml:"value"`
Descr string `xml:"description"`
} `xml:"registry>record"`
}
type canonAddrFamilyRecord struct {
Name string
Descr string
Value int
}
func (afn *addrFamilylNumbers) escape() []canonAddrFamilyRecord {
afrs := make([]canonAddrFamilyRecord, len(afn.Records))
sr := strings.NewReplacer(
"IP version 4", "IPv4",
"IP version 6", "IPv6",
"Identifier", "ID",
"-", "",
"-", "",
"/", "",
".", "",
" ", "",
)
for i, afr := range afn.Records {
if strings.Contains(afr.Descr, "Unassigned") ||
strings.Contains(afr.Descr, "Reserved") {
continue
}
afrs[i].Descr = afr.Descr
s := strings.TrimSpace(afr.Descr)
switch s {
case "IP (IP version 4)":
afrs[i].Name = "IPv4"
case "IP6 (IP version 6)":
afrs[i].Name = "IPv6"
case "AFI for L2VPN information":
afrs[i].Name = "L2VPN"
case "E.164 with NSAP format subaddress":
afrs[i].Name = "E164withSubaddress"
case "MT IP: Multi-Topology IP version 4":
afrs[i].Name = "MTIPv4"
case "MAC/24":
afrs[i].Name = "MACFinal24bits"
case "MAC/40":
afrs[i].Name = "MACFinal40bits"
case "IPv6/64":
afrs[i].Name = "IPv6Initial64bits"
default:
n := strings.Index(s, "(")
if n > 0 {
s = s[:n]
}
n = strings.Index(s, ":")
if n > 0 {
s = s[:n]
}
afrs[i].Name = sr.Replace(s)
}
afrs[i].Value, _ = strconv.Atoi(afr.Value)
}
return afrs
}

View file

@ -80,7 +80,7 @@ var registries = []struct {
func geniana() error {
var bb bytes.Buffer
fmt.Fprintf(&bb, "// go generate gen.go\n")
fmt.Fprintf(&bb, "// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT\n\n")
fmt.Fprintf(&bb, "// Code generated by the command above; DO NOT EDIT.\n\n")
fmt.Fprintf(&bb, "package ipv4\n\n")
for _, r := range registries {
resp, err := http.Get(r.url)

10
vendor/golang.org/x/net/ipv4/iana.go generated vendored
View file

@ -1,9 +1,9 @@
// go generate gen.go
// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT
// Code generated by the command above; DO NOT EDIT.
package ipv4
// Internet Control Message Protocol (ICMP) Parameters, Updated: 2013-04-19
// Internet Control Message Protocol (ICMP) Parameters, Updated: 2018-02-26
const (
ICMPTypeEchoReply ICMPType = 0 // Echo Reply
ICMPTypeDestinationUnreachable ICMPType = 3 // Destination Unreachable
@ -16,9 +16,11 @@ const (
ICMPTypeTimestamp ICMPType = 13 // Timestamp
ICMPTypeTimestampReply ICMPType = 14 // Timestamp Reply
ICMPTypePhoturis ICMPType = 40 // Photuris
ICMPTypeExtendedEchoRequest ICMPType = 42 // Extended Echo Request
ICMPTypeExtendedEchoReply ICMPType = 43 // Extended Echo Reply
)
// Internet Control Message Protocol (ICMP) Parameters, Updated: 2013-04-19
// Internet Control Message Protocol (ICMP) Parameters, Updated: 2018-02-26
var icmpTypes = map[ICMPType]string{
0: "echo reply",
3: "destination unreachable",
@ -31,4 +33,6 @@ var icmpTypes = map[ICMPType]string{
13: "timestamp",
14: "timestamp reply",
40: "photuris",
42: "extended echo request",
43: "extended echo reply",
}

View file

@ -80,7 +80,7 @@ var registries = []struct {
func geniana() error {
var bb bytes.Buffer
fmt.Fprintf(&bb, "// go generate gen.go\n")
fmt.Fprintf(&bb, "// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT\n\n")
fmt.Fprintf(&bb, "// Code generated by the command above; DO NOT EDIT.\n\n")
fmt.Fprintf(&bb, "package ipv6\n\n")
for _, r := range registries {
resp, err := http.Get(r.url)

10
vendor/golang.org/x/net/ipv6/iana.go generated vendored
View file

@ -1,9 +1,9 @@
// go generate gen.go
// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT
// Code generated by the command above; DO NOT EDIT.
package ipv6
// Internet Control Message Protocol version 6 (ICMPv6) Parameters, Updated: 2015-07-07
// Internet Control Message Protocol version 6 (ICMPv6) Parameters, Updated: 2018-03-09
const (
ICMPTypeDestinationUnreachable ICMPType = 1 // Destination Unreachable
ICMPTypePacketTooBig ICMPType = 2 // Packet Too Big
@ -40,9 +40,11 @@ const (
ICMPTypeDuplicateAddressRequest ICMPType = 157 // Duplicate Address Request
ICMPTypeDuplicateAddressConfirmation ICMPType = 158 // Duplicate Address Confirmation
ICMPTypeMPLControl ICMPType = 159 // MPL Control Message
ICMPTypeExtendedEchoRequest ICMPType = 160 // Extended Echo Request
ICMPTypeExtendedEchoReply ICMPType = 161 // Extended Echo Reply
)
// Internet Control Message Protocol version 6 (ICMPv6) Parameters, Updated: 2015-07-07
// Internet Control Message Protocol version 6 (ICMPv6) Parameters, Updated: 2018-03-09
var icmpTypes = map[ICMPType]string{
1: "destination unreachable",
2: "packet too big",
@ -79,4 +81,6 @@ var icmpTypes = map[ICMPType]string{
157: "duplicate address request",
158: "duplicate address confirmation",
159: "mpl control message",
160: "extended echo request",
161: "extended echo reply",
}

View file

@ -11,11 +11,14 @@
// system, set $GOOS and $GOARCH to the desired system. For example, if
// you want to view documentation for freebsd/arm on linux/amd64, set $GOOS
// to freebsd and $GOARCH to arm.
//
// The primary use of this package is inside other packages that provide a more
// portable interface to the system, such as "os", "time" and "net". Use
// those packages rather than this one if you can.
//
// For details of the functions and data types in this package consult
// the manuals for the appropriate operating system.
//
// These calls return err == nil to indicate success; otherwise
// err represents an operating system error describing the failure and
// holds a value of type syscall.Errno.