mirror of
https://github.com/refraction-networking/utls.git
synced 2025-04-07 05:57:34 +03:00
Simple single-byte loop count for now, to be further improved in future CLs. Benchmark on linux/arm: name old time/op new time/op delta CountSingle/10-4 122ns ± 0% 87ns ± 1% -28.41% (p=0.000 n=7+10) CountSingle/32-4 242ns ± 0% 174ns ± 1% -28.25% (p=0.000 n=10+10) CountSingle/4K-4 24.2µs ± 1% 15.6µs ± 1% -35.42% (p=0.000 n=10+10) CountSingle/4M-4 29.6ms ± 1% 21.3ms ± 1% -28.09% (p=0.000 n=10+9) CountSingle/64M-4 562ms ± 0% 414ms ± 1% -26.23% (p=0.000 n=8+10) name old speed new speed delta CountSingle/10-4 81.7MB/s ± 1% 114.5MB/s ± 1% +40.07% (p=0.000 n=10+10) CountSingle/32-4 132MB/s ± 0% 184MB/s ± 1% +39.39% (p=0.000 n=10+9) CountSingle/4K-4 170MB/s ± 1% 263MB/s ± 1% +54.86% (p=0.000 n=10+10) CountSingle/4M-4 142MB/s ± 1% 197MB/s ± 1% +39.07% (p=0.000 n=10+9) CountSingle/64M-4 119MB/s ± 0% 162MB/s ± 1% +35.55% (p=0.000 n=8+10) Updates #29001 Change-Id: I42a268215a62044286ec32b548d8e4b86b9570ee Reviewed-on: https://go-review.googlesource.com/c/go/+/168319 Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
43 lines
917 B
ArmAsm
43 lines
917 B
ArmAsm
// Copyright 2019 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
#include "go_asm.h"
|
|
#include "textflag.h"
|
|
|
|
TEXT ·Count(SB),NOSPLIT,$0-20
|
|
MOVW b_base+0(FP), R0
|
|
MOVW b_len+4(FP), R1
|
|
MOVBU c+12(FP), R2
|
|
MOVW $ret+16(FP), R7
|
|
B countbytebody<>(SB)
|
|
|
|
TEXT ·CountString(SB),NOSPLIT,$0-16
|
|
MOVW s_base+0(FP), R0
|
|
MOVW s_len+4(FP), R1
|
|
MOVBU c+8(FP), R2
|
|
MOVW $ret+12(FP), R7
|
|
B countbytebody<>(SB)
|
|
|
|
// Input:
|
|
// R0: data
|
|
// R1: data length
|
|
// R2: byte to find
|
|
// R7: address to put result
|
|
//
|
|
// On exit:
|
|
// R4 and R8 are clobbered
|
|
TEXT countbytebody<>(SB),NOSPLIT,$0
|
|
MOVW $0, R8 // R8 = count of byte to search
|
|
CMP $0, R1
|
|
B.EQ done // short path to handle 0-byte case
|
|
ADD R0, R1 // R1 is the end of the range
|
|
byte_loop:
|
|
MOVBU.P 1(R0), R4
|
|
CMP R4, R2
|
|
ADD.EQ $1, R8
|
|
CMP R0, R1
|
|
B.NE byte_loop
|
|
done:
|
|
MOVW R8, (R7)
|
|
RET
|