-
Notifications
You must be signed in to change notification settings - Fork 305
/
Copy pathmask_arm64.s
72 lines (63 loc) · 1.38 KB
/
mask_arm64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#include "textflag.h"
// func maskAsm(b *byte, len int, key uint32)
TEXT ·maskAsm(SB), NOSPLIT, $0-28
// R0 = b
// R1 = len
// R3 = key (uint32)
// R2 = uint64(key)<<32 | uint64(key)
MOVD b_ptr+0(FP), R0
MOVD b_len+8(FP), R1
MOVWU key+16(FP), R3
MOVD R3, R2
ORR R2<<32, R2, R2
VDUP R2, V0.D2
CMP $64, R1
BLT less_than_64
loop_64:
VLD1 (R0), [V1.B16, V2.B16, V3.B16, V4.B16]
VEOR V1.B16, V0.B16, V1.B16
VEOR V2.B16, V0.B16, V2.B16
VEOR V3.B16, V0.B16, V3.B16
VEOR V4.B16, V0.B16, V4.B16
VST1.P [V1.B16, V2.B16, V3.B16, V4.B16], 64(R0)
SUBS $64, R1
CMP $64, R1
BGE loop_64
less_than_64:
CBZ R1, end
TBZ $5, R1, less_than_32
VLD1 (R0), [V1.B16, V2.B16]
VEOR V1.B16, V0.B16, V1.B16
VEOR V2.B16, V0.B16, V2.B16
VST1.P [V1.B16, V2.B16], 32(R0)
less_than_32:
TBZ $4, R1, less_than_16
LDP (R0), (R11, R12)
EOR R11, R2, R11
EOR R12, R2, R12
STP.P (R11, R12), 16(R0)
less_than_16:
TBZ $3, R1, less_than_8
MOVD (R0), R11
EOR R2, R11, R11
MOVD.P R11, 8(R0)
less_than_8:
TBZ $2, R1, less_than_4
MOVWU (R0), R11
EORW R2, R11, R11
MOVWU.P R11, 4(R0)
less_than_4:
TBZ $1, R1, less_than_2
MOVHU (R0), R11
EORW R3, R11, R11
MOVHU.P R11, 2(R0)
RORW $16, R3
less_than_2:
TBZ $0, R1, end
MOVBU (R0), R11
EORW R3, R11, R11
MOVBU.P R11, 1(R0)
RORW $8, R3
end:
MOVWU R3, ret+24(FP)
RET