mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-03-17 20:54:10 +00:00
crypto: arm64/aes-bs - yield NEON after every block of input
Avoid excessive scheduling delays under a preemptible kernel by yielding the NEON after every block of input. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
0c8f838a52
commit
20ab633258
1 changed files with 168 additions and 133 deletions
|
@ -565,54 +565,61 @@ ENDPROC(aesbs_decrypt8)
|
||||||
* int blocks)
|
* int blocks)
|
||||||
*/
|
*/
|
||||||
.macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
.macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||||
stp x29, x30, [sp, #-16]!
|
frame_push 5
|
||||||
mov x29, sp
|
|
||||||
|
mov x19, x0
|
||||||
|
mov x20, x1
|
||||||
|
mov x21, x2
|
||||||
|
mov x22, x3
|
||||||
|
mov x23, x4
|
||||||
|
|
||||||
99: mov x5, #1
|
99: mov x5, #1
|
||||||
lsl x5, x5, x4
|
lsl x5, x5, x23
|
||||||
subs w4, w4, #8
|
subs w23, w23, #8
|
||||||
csel x4, x4, xzr, pl
|
csel x23, x23, xzr, pl
|
||||||
csel x5, x5, xzr, mi
|
csel x5, x5, xzr, mi
|
||||||
|
|
||||||
ld1 {v0.16b}, [x1], #16
|
ld1 {v0.16b}, [x20], #16
|
||||||
tbnz x5, #1, 0f
|
tbnz x5, #1, 0f
|
||||||
ld1 {v1.16b}, [x1], #16
|
ld1 {v1.16b}, [x20], #16
|
||||||
tbnz x5, #2, 0f
|
tbnz x5, #2, 0f
|
||||||
ld1 {v2.16b}, [x1], #16
|
ld1 {v2.16b}, [x20], #16
|
||||||
tbnz x5, #3, 0f
|
tbnz x5, #3, 0f
|
||||||
ld1 {v3.16b}, [x1], #16
|
ld1 {v3.16b}, [x20], #16
|
||||||
tbnz x5, #4, 0f
|
tbnz x5, #4, 0f
|
||||||
ld1 {v4.16b}, [x1], #16
|
ld1 {v4.16b}, [x20], #16
|
||||||
tbnz x5, #5, 0f
|
tbnz x5, #5, 0f
|
||||||
ld1 {v5.16b}, [x1], #16
|
ld1 {v5.16b}, [x20], #16
|
||||||
tbnz x5, #6, 0f
|
tbnz x5, #6, 0f
|
||||||
ld1 {v6.16b}, [x1], #16
|
ld1 {v6.16b}, [x20], #16
|
||||||
tbnz x5, #7, 0f
|
tbnz x5, #7, 0f
|
||||||
ld1 {v7.16b}, [x1], #16
|
ld1 {v7.16b}, [x20], #16
|
||||||
|
|
||||||
0: mov bskey, x2
|
0: mov bskey, x21
|
||||||
mov rounds, x3
|
mov rounds, x22
|
||||||
bl \do8
|
bl \do8
|
||||||
|
|
||||||
st1 {\o0\().16b}, [x0], #16
|
st1 {\o0\().16b}, [x19], #16
|
||||||
tbnz x5, #1, 1f
|
tbnz x5, #1, 1f
|
||||||
st1 {\o1\().16b}, [x0], #16
|
st1 {\o1\().16b}, [x19], #16
|
||||||
tbnz x5, #2, 1f
|
tbnz x5, #2, 1f
|
||||||
st1 {\o2\().16b}, [x0], #16
|
st1 {\o2\().16b}, [x19], #16
|
||||||
tbnz x5, #3, 1f
|
tbnz x5, #3, 1f
|
||||||
st1 {\o3\().16b}, [x0], #16
|
st1 {\o3\().16b}, [x19], #16
|
||||||
tbnz x5, #4, 1f
|
tbnz x5, #4, 1f
|
||||||
st1 {\o4\().16b}, [x0], #16
|
st1 {\o4\().16b}, [x19], #16
|
||||||
tbnz x5, #5, 1f
|
tbnz x5, #5, 1f
|
||||||
st1 {\o5\().16b}, [x0], #16
|
st1 {\o5\().16b}, [x19], #16
|
||||||
tbnz x5, #6, 1f
|
tbnz x5, #6, 1f
|
||||||
st1 {\o6\().16b}, [x0], #16
|
st1 {\o6\().16b}, [x19], #16
|
||||||
tbnz x5, #7, 1f
|
tbnz x5, #7, 1f
|
||||||
st1 {\o7\().16b}, [x0], #16
|
st1 {\o7\().16b}, [x19], #16
|
||||||
|
|
||||||
cbnz x4, 99b
|
cbz x23, 1f
|
||||||
|
cond_yield_neon
|
||||||
|
b 99b
|
||||||
|
|
||||||
1: ldp x29, x30, [sp], #16
|
1: frame_pop
|
||||||
ret
|
ret
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -632,43 +639,49 @@ ENDPROC(aesbs_ecb_decrypt)
|
||||||
*/
|
*/
|
||||||
.align 4
|
.align 4
|
||||||
ENTRY(aesbs_cbc_decrypt)
|
ENTRY(aesbs_cbc_decrypt)
|
||||||
stp x29, x30, [sp, #-16]!
|
frame_push 6
|
||||||
mov x29, sp
|
|
||||||
|
mov x19, x0
|
||||||
|
mov x20, x1
|
||||||
|
mov x21, x2
|
||||||
|
mov x22, x3
|
||||||
|
mov x23, x4
|
||||||
|
mov x24, x5
|
||||||
|
|
||||||
99: mov x6, #1
|
99: mov x6, #1
|
||||||
lsl x6, x6, x4
|
lsl x6, x6, x23
|
||||||
subs w4, w4, #8
|
subs w23, w23, #8
|
||||||
csel x4, x4, xzr, pl
|
csel x23, x23, xzr, pl
|
||||||
csel x6, x6, xzr, mi
|
csel x6, x6, xzr, mi
|
||||||
|
|
||||||
ld1 {v0.16b}, [x1], #16
|
ld1 {v0.16b}, [x20], #16
|
||||||
mov v25.16b, v0.16b
|
mov v25.16b, v0.16b
|
||||||
tbnz x6, #1, 0f
|
tbnz x6, #1, 0f
|
||||||
ld1 {v1.16b}, [x1], #16
|
ld1 {v1.16b}, [x20], #16
|
||||||
mov v26.16b, v1.16b
|
mov v26.16b, v1.16b
|
||||||
tbnz x6, #2, 0f
|
tbnz x6, #2, 0f
|
||||||
ld1 {v2.16b}, [x1], #16
|
ld1 {v2.16b}, [x20], #16
|
||||||
mov v27.16b, v2.16b
|
mov v27.16b, v2.16b
|
||||||
tbnz x6, #3, 0f
|
tbnz x6, #3, 0f
|
||||||
ld1 {v3.16b}, [x1], #16
|
ld1 {v3.16b}, [x20], #16
|
||||||
mov v28.16b, v3.16b
|
mov v28.16b, v3.16b
|
||||||
tbnz x6, #4, 0f
|
tbnz x6, #4, 0f
|
||||||
ld1 {v4.16b}, [x1], #16
|
ld1 {v4.16b}, [x20], #16
|
||||||
mov v29.16b, v4.16b
|
mov v29.16b, v4.16b
|
||||||
tbnz x6, #5, 0f
|
tbnz x6, #5, 0f
|
||||||
ld1 {v5.16b}, [x1], #16
|
ld1 {v5.16b}, [x20], #16
|
||||||
mov v30.16b, v5.16b
|
mov v30.16b, v5.16b
|
||||||
tbnz x6, #6, 0f
|
tbnz x6, #6, 0f
|
||||||
ld1 {v6.16b}, [x1], #16
|
ld1 {v6.16b}, [x20], #16
|
||||||
mov v31.16b, v6.16b
|
mov v31.16b, v6.16b
|
||||||
tbnz x6, #7, 0f
|
tbnz x6, #7, 0f
|
||||||
ld1 {v7.16b}, [x1]
|
ld1 {v7.16b}, [x20]
|
||||||
|
|
||||||
0: mov bskey, x2
|
0: mov bskey, x21
|
||||||
mov rounds, x3
|
mov rounds, x22
|
||||||
bl aesbs_decrypt8
|
bl aesbs_decrypt8
|
||||||
|
|
||||||
ld1 {v24.16b}, [x5] // load IV
|
ld1 {v24.16b}, [x24] // load IV
|
||||||
|
|
||||||
eor v1.16b, v1.16b, v25.16b
|
eor v1.16b, v1.16b, v25.16b
|
||||||
eor v6.16b, v6.16b, v26.16b
|
eor v6.16b, v6.16b, v26.16b
|
||||||
|
@ -679,34 +692,36 @@ ENTRY(aesbs_cbc_decrypt)
|
||||||
eor v3.16b, v3.16b, v30.16b
|
eor v3.16b, v3.16b, v30.16b
|
||||||
eor v5.16b, v5.16b, v31.16b
|
eor v5.16b, v5.16b, v31.16b
|
||||||
|
|
||||||
st1 {v0.16b}, [x0], #16
|
st1 {v0.16b}, [x19], #16
|
||||||
mov v24.16b, v25.16b
|
mov v24.16b, v25.16b
|
||||||
tbnz x6, #1, 1f
|
tbnz x6, #1, 1f
|
||||||
st1 {v1.16b}, [x0], #16
|
st1 {v1.16b}, [x19], #16
|
||||||
mov v24.16b, v26.16b
|
mov v24.16b, v26.16b
|
||||||
tbnz x6, #2, 1f
|
tbnz x6, #2, 1f
|
||||||
st1 {v6.16b}, [x0], #16
|
st1 {v6.16b}, [x19], #16
|
||||||
mov v24.16b, v27.16b
|
mov v24.16b, v27.16b
|
||||||
tbnz x6, #3, 1f
|
tbnz x6, #3, 1f
|
||||||
st1 {v4.16b}, [x0], #16
|
st1 {v4.16b}, [x19], #16
|
||||||
mov v24.16b, v28.16b
|
mov v24.16b, v28.16b
|
||||||
tbnz x6, #4, 1f
|
tbnz x6, #4, 1f
|
||||||
st1 {v2.16b}, [x0], #16
|
st1 {v2.16b}, [x19], #16
|
||||||
mov v24.16b, v29.16b
|
mov v24.16b, v29.16b
|
||||||
tbnz x6, #5, 1f
|
tbnz x6, #5, 1f
|
||||||
st1 {v7.16b}, [x0], #16
|
st1 {v7.16b}, [x19], #16
|
||||||
mov v24.16b, v30.16b
|
mov v24.16b, v30.16b
|
||||||
tbnz x6, #6, 1f
|
tbnz x6, #6, 1f
|
||||||
st1 {v3.16b}, [x0], #16
|
st1 {v3.16b}, [x19], #16
|
||||||
mov v24.16b, v31.16b
|
mov v24.16b, v31.16b
|
||||||
tbnz x6, #7, 1f
|
tbnz x6, #7, 1f
|
||||||
ld1 {v24.16b}, [x1], #16
|
ld1 {v24.16b}, [x20], #16
|
||||||
st1 {v5.16b}, [x0], #16
|
st1 {v5.16b}, [x19], #16
|
||||||
1: st1 {v24.16b}, [x5] // store IV
|
1: st1 {v24.16b}, [x24] // store IV
|
||||||
|
|
||||||
cbnz x4, 99b
|
cbz x23, 2f
|
||||||
|
cond_yield_neon
|
||||||
|
b 99b
|
||||||
|
|
||||||
ldp x29, x30, [sp], #16
|
2: frame_pop
|
||||||
ret
|
ret
|
||||||
ENDPROC(aesbs_cbc_decrypt)
|
ENDPROC(aesbs_cbc_decrypt)
|
||||||
|
|
||||||
|
@ -731,87 +746,93 @@ CPU_BE( .quad 0x87, 1 )
|
||||||
*/
|
*/
|
||||||
__xts_crypt8:
|
__xts_crypt8:
|
||||||
mov x6, #1
|
mov x6, #1
|
||||||
lsl x6, x6, x4
|
lsl x6, x6, x23
|
||||||
subs w4, w4, #8
|
subs w23, w23, #8
|
||||||
csel x4, x4, xzr, pl
|
csel x23, x23, xzr, pl
|
||||||
csel x6, x6, xzr, mi
|
csel x6, x6, xzr, mi
|
||||||
|
|
||||||
ld1 {v0.16b}, [x1], #16
|
ld1 {v0.16b}, [x20], #16
|
||||||
next_tweak v26, v25, v30, v31
|
next_tweak v26, v25, v30, v31
|
||||||
eor v0.16b, v0.16b, v25.16b
|
eor v0.16b, v0.16b, v25.16b
|
||||||
tbnz x6, #1, 0f
|
tbnz x6, #1, 0f
|
||||||
|
|
||||||
ld1 {v1.16b}, [x1], #16
|
ld1 {v1.16b}, [x20], #16
|
||||||
next_tweak v27, v26, v30, v31
|
next_tweak v27, v26, v30, v31
|
||||||
eor v1.16b, v1.16b, v26.16b
|
eor v1.16b, v1.16b, v26.16b
|
||||||
tbnz x6, #2, 0f
|
tbnz x6, #2, 0f
|
||||||
|
|
||||||
ld1 {v2.16b}, [x1], #16
|
ld1 {v2.16b}, [x20], #16
|
||||||
next_tweak v28, v27, v30, v31
|
next_tweak v28, v27, v30, v31
|
||||||
eor v2.16b, v2.16b, v27.16b
|
eor v2.16b, v2.16b, v27.16b
|
||||||
tbnz x6, #3, 0f
|
tbnz x6, #3, 0f
|
||||||
|
|
||||||
ld1 {v3.16b}, [x1], #16
|
ld1 {v3.16b}, [x20], #16
|
||||||
next_tweak v29, v28, v30, v31
|
next_tweak v29, v28, v30, v31
|
||||||
eor v3.16b, v3.16b, v28.16b
|
eor v3.16b, v3.16b, v28.16b
|
||||||
tbnz x6, #4, 0f
|
tbnz x6, #4, 0f
|
||||||
|
|
||||||
ld1 {v4.16b}, [x1], #16
|
ld1 {v4.16b}, [x20], #16
|
||||||
str q29, [sp, #16]
|
str q29, [sp, #.Lframe_local_offset]
|
||||||
eor v4.16b, v4.16b, v29.16b
|
eor v4.16b, v4.16b, v29.16b
|
||||||
next_tweak v29, v29, v30, v31
|
next_tweak v29, v29, v30, v31
|
||||||
tbnz x6, #5, 0f
|
tbnz x6, #5, 0f
|
||||||
|
|
||||||
ld1 {v5.16b}, [x1], #16
|
ld1 {v5.16b}, [x20], #16
|
||||||
str q29, [sp, #32]
|
str q29, [sp, #.Lframe_local_offset + 16]
|
||||||
eor v5.16b, v5.16b, v29.16b
|
eor v5.16b, v5.16b, v29.16b
|
||||||
next_tweak v29, v29, v30, v31
|
next_tweak v29, v29, v30, v31
|
||||||
tbnz x6, #6, 0f
|
tbnz x6, #6, 0f
|
||||||
|
|
||||||
ld1 {v6.16b}, [x1], #16
|
ld1 {v6.16b}, [x20], #16
|
||||||
str q29, [sp, #48]
|
str q29, [sp, #.Lframe_local_offset + 32]
|
||||||
eor v6.16b, v6.16b, v29.16b
|
eor v6.16b, v6.16b, v29.16b
|
||||||
next_tweak v29, v29, v30, v31
|
next_tweak v29, v29, v30, v31
|
||||||
tbnz x6, #7, 0f
|
tbnz x6, #7, 0f
|
||||||
|
|
||||||
ld1 {v7.16b}, [x1], #16
|
ld1 {v7.16b}, [x20], #16
|
||||||
str q29, [sp, #64]
|
str q29, [sp, #.Lframe_local_offset + 48]
|
||||||
eor v7.16b, v7.16b, v29.16b
|
eor v7.16b, v7.16b, v29.16b
|
||||||
next_tweak v29, v29, v30, v31
|
next_tweak v29, v29, v30, v31
|
||||||
|
|
||||||
0: mov bskey, x2
|
0: mov bskey, x21
|
||||||
mov rounds, x3
|
mov rounds, x22
|
||||||
br x7
|
br x7
|
||||||
ENDPROC(__xts_crypt8)
|
ENDPROC(__xts_crypt8)
|
||||||
|
|
||||||
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||||
stp x29, x30, [sp, #-80]!
|
frame_push 6, 64
|
||||||
mov x29, sp
|
|
||||||
|
|
||||||
ldr q30, .Lxts_mul_x
|
mov x19, x0
|
||||||
ld1 {v25.16b}, [x5]
|
mov x20, x1
|
||||||
|
mov x21, x2
|
||||||
|
mov x22, x3
|
||||||
|
mov x23, x4
|
||||||
|
mov x24, x5
|
||||||
|
|
||||||
|
0: ldr q30, .Lxts_mul_x
|
||||||
|
ld1 {v25.16b}, [x24]
|
||||||
|
|
||||||
99: adr x7, \do8
|
99: adr x7, \do8
|
||||||
bl __xts_crypt8
|
bl __xts_crypt8
|
||||||
|
|
||||||
ldp q16, q17, [sp, #16]
|
ldp q16, q17, [sp, #.Lframe_local_offset]
|
||||||
ldp q18, q19, [sp, #48]
|
ldp q18, q19, [sp, #.Lframe_local_offset + 32]
|
||||||
|
|
||||||
eor \o0\().16b, \o0\().16b, v25.16b
|
eor \o0\().16b, \o0\().16b, v25.16b
|
||||||
eor \o1\().16b, \o1\().16b, v26.16b
|
eor \o1\().16b, \o1\().16b, v26.16b
|
||||||
eor \o2\().16b, \o2\().16b, v27.16b
|
eor \o2\().16b, \o2\().16b, v27.16b
|
||||||
eor \o3\().16b, \o3\().16b, v28.16b
|
eor \o3\().16b, \o3\().16b, v28.16b
|
||||||
|
|
||||||
st1 {\o0\().16b}, [x0], #16
|
st1 {\o0\().16b}, [x19], #16
|
||||||
mov v25.16b, v26.16b
|
mov v25.16b, v26.16b
|
||||||
tbnz x6, #1, 1f
|
tbnz x6, #1, 1f
|
||||||
st1 {\o1\().16b}, [x0], #16
|
st1 {\o1\().16b}, [x19], #16
|
||||||
mov v25.16b, v27.16b
|
mov v25.16b, v27.16b
|
||||||
tbnz x6, #2, 1f
|
tbnz x6, #2, 1f
|
||||||
st1 {\o2\().16b}, [x0], #16
|
st1 {\o2\().16b}, [x19], #16
|
||||||
mov v25.16b, v28.16b
|
mov v25.16b, v28.16b
|
||||||
tbnz x6, #3, 1f
|
tbnz x6, #3, 1f
|
||||||
st1 {\o3\().16b}, [x0], #16
|
st1 {\o3\().16b}, [x19], #16
|
||||||
mov v25.16b, v29.16b
|
mov v25.16b, v29.16b
|
||||||
tbnz x6, #4, 1f
|
tbnz x6, #4, 1f
|
||||||
|
|
||||||
|
@ -820,18 +841,22 @@ ENDPROC(__xts_crypt8)
|
||||||
eor \o6\().16b, \o6\().16b, v18.16b
|
eor \o6\().16b, \o6\().16b, v18.16b
|
||||||
eor \o7\().16b, \o7\().16b, v19.16b
|
eor \o7\().16b, \o7\().16b, v19.16b
|
||||||
|
|
||||||
st1 {\o4\().16b}, [x0], #16
|
st1 {\o4\().16b}, [x19], #16
|
||||||
tbnz x6, #5, 1f
|
tbnz x6, #5, 1f
|
||||||
st1 {\o5\().16b}, [x0], #16
|
st1 {\o5\().16b}, [x19], #16
|
||||||
tbnz x6, #6, 1f
|
tbnz x6, #6, 1f
|
||||||
st1 {\o6\().16b}, [x0], #16
|
st1 {\o6\().16b}, [x19], #16
|
||||||
tbnz x6, #7, 1f
|
tbnz x6, #7, 1f
|
||||||
st1 {\o7\().16b}, [x0], #16
|
st1 {\o7\().16b}, [x19], #16
|
||||||
|
|
||||||
cbnz x4, 99b
|
cbz x23, 1f
|
||||||
|
st1 {v25.16b}, [x24]
|
||||||
|
|
||||||
1: st1 {v25.16b}, [x5]
|
cond_yield_neon 0b
|
||||||
ldp x29, x30, [sp], #80
|
b 99b
|
||||||
|
|
||||||
|
1: st1 {v25.16b}, [x24]
|
||||||
|
frame_pop
|
||||||
ret
|
ret
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -856,24 +881,31 @@ ENDPROC(aesbs_xts_decrypt)
|
||||||
* int rounds, int blocks, u8 iv[], u8 final[])
|
* int rounds, int blocks, u8 iv[], u8 final[])
|
||||||
*/
|
*/
|
||||||
ENTRY(aesbs_ctr_encrypt)
|
ENTRY(aesbs_ctr_encrypt)
|
||||||
stp x29, x30, [sp, #-16]!
|
frame_push 8
|
||||||
mov x29, sp
|
|
||||||
|
|
||||||
cmp x6, #0
|
mov x19, x0
|
||||||
cset x10, ne
|
mov x20, x1
|
||||||
add x4, x4, x10 // do one extra block if final
|
mov x21, x2
|
||||||
|
mov x22, x3
|
||||||
|
mov x23, x4
|
||||||
|
mov x24, x5
|
||||||
|
mov x25, x6
|
||||||
|
|
||||||
ldp x7, x8, [x5]
|
cmp x25, #0
|
||||||
ld1 {v0.16b}, [x5]
|
cset x26, ne
|
||||||
|
add x23, x23, x26 // do one extra block if final
|
||||||
|
|
||||||
|
98: ldp x7, x8, [x24]
|
||||||
|
ld1 {v0.16b}, [x24]
|
||||||
CPU_LE( rev x7, x7 )
|
CPU_LE( rev x7, x7 )
|
||||||
CPU_LE( rev x8, x8 )
|
CPU_LE( rev x8, x8 )
|
||||||
adds x8, x8, #1
|
adds x8, x8, #1
|
||||||
adc x7, x7, xzr
|
adc x7, x7, xzr
|
||||||
|
|
||||||
99: mov x9, #1
|
99: mov x9, #1
|
||||||
lsl x9, x9, x4
|
lsl x9, x9, x23
|
||||||
subs w4, w4, #8
|
subs w23, w23, #8
|
||||||
csel x4, x4, xzr, pl
|
csel x23, x23, xzr, pl
|
||||||
csel x9, x9, xzr, le
|
csel x9, x9, xzr, le
|
||||||
|
|
||||||
tbnz x9, #1, 0f
|
tbnz x9, #1, 0f
|
||||||
|
@ -891,82 +923,85 @@ CPU_LE( rev x8, x8 )
|
||||||
tbnz x9, #7, 0f
|
tbnz x9, #7, 0f
|
||||||
next_ctr v7
|
next_ctr v7
|
||||||
|
|
||||||
0: mov bskey, x2
|
0: mov bskey, x21
|
||||||
mov rounds, x3
|
mov rounds, x22
|
||||||
bl aesbs_encrypt8
|
bl aesbs_encrypt8
|
||||||
|
|
||||||
lsr x9, x9, x10 // disregard the extra block
|
lsr x9, x9, x26 // disregard the extra block
|
||||||
tbnz x9, #0, 0f
|
tbnz x9, #0, 0f
|
||||||
|
|
||||||
ld1 {v8.16b}, [x1], #16
|
ld1 {v8.16b}, [x20], #16
|
||||||
eor v0.16b, v0.16b, v8.16b
|
eor v0.16b, v0.16b, v8.16b
|
||||||
st1 {v0.16b}, [x0], #16
|
st1 {v0.16b}, [x19], #16
|
||||||
tbnz x9, #1, 1f
|
tbnz x9, #1, 1f
|
||||||
|
|
||||||
ld1 {v9.16b}, [x1], #16
|
ld1 {v9.16b}, [x20], #16
|
||||||
eor v1.16b, v1.16b, v9.16b
|
eor v1.16b, v1.16b, v9.16b
|
||||||
st1 {v1.16b}, [x0], #16
|
st1 {v1.16b}, [x19], #16
|
||||||
tbnz x9, #2, 2f
|
tbnz x9, #2, 2f
|
||||||
|
|
||||||
ld1 {v10.16b}, [x1], #16
|
ld1 {v10.16b}, [x20], #16
|
||||||
eor v4.16b, v4.16b, v10.16b
|
eor v4.16b, v4.16b, v10.16b
|
||||||
st1 {v4.16b}, [x0], #16
|
st1 {v4.16b}, [x19], #16
|
||||||
tbnz x9, #3, 3f
|
tbnz x9, #3, 3f
|
||||||
|
|
||||||
ld1 {v11.16b}, [x1], #16
|
ld1 {v11.16b}, [x20], #16
|
||||||
eor v6.16b, v6.16b, v11.16b
|
eor v6.16b, v6.16b, v11.16b
|
||||||
st1 {v6.16b}, [x0], #16
|
st1 {v6.16b}, [x19], #16
|
||||||
tbnz x9, #4, 4f
|
tbnz x9, #4, 4f
|
||||||
|
|
||||||
ld1 {v12.16b}, [x1], #16
|
ld1 {v12.16b}, [x20], #16
|
||||||
eor v3.16b, v3.16b, v12.16b
|
eor v3.16b, v3.16b, v12.16b
|
||||||
st1 {v3.16b}, [x0], #16
|
st1 {v3.16b}, [x19], #16
|
||||||
tbnz x9, #5, 5f
|
tbnz x9, #5, 5f
|
||||||
|
|
||||||
ld1 {v13.16b}, [x1], #16
|
ld1 {v13.16b}, [x20], #16
|
||||||
eor v7.16b, v7.16b, v13.16b
|
eor v7.16b, v7.16b, v13.16b
|
||||||
st1 {v7.16b}, [x0], #16
|
st1 {v7.16b}, [x19], #16
|
||||||
tbnz x9, #6, 6f
|
tbnz x9, #6, 6f
|
||||||
|
|
||||||
ld1 {v14.16b}, [x1], #16
|
ld1 {v14.16b}, [x20], #16
|
||||||
eor v2.16b, v2.16b, v14.16b
|
eor v2.16b, v2.16b, v14.16b
|
||||||
st1 {v2.16b}, [x0], #16
|
st1 {v2.16b}, [x19], #16
|
||||||
tbnz x9, #7, 7f
|
tbnz x9, #7, 7f
|
||||||
|
|
||||||
ld1 {v15.16b}, [x1], #16
|
ld1 {v15.16b}, [x20], #16
|
||||||
eor v5.16b, v5.16b, v15.16b
|
eor v5.16b, v5.16b, v15.16b
|
||||||
st1 {v5.16b}, [x0], #16
|
st1 {v5.16b}, [x19], #16
|
||||||
|
|
||||||
8: next_ctr v0
|
8: next_ctr v0
|
||||||
cbnz x4, 99b
|
st1 {v0.16b}, [x24]
|
||||||
|
cbz x23, 0f
|
||||||
|
|
||||||
0: st1 {v0.16b}, [x5]
|
cond_yield_neon 98b
|
||||||
ldp x29, x30, [sp], #16
|
b 99b
|
||||||
|
|
||||||
|
0: frame_pop
|
||||||
ret
|
ret
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we are handling the tail of the input (x6 != NULL), return the
|
* If we are handling the tail of the input (x6 != NULL), return the
|
||||||
* final keystream block back to the caller.
|
* final keystream block back to the caller.
|
||||||
*/
|
*/
|
||||||
1: cbz x6, 8b
|
1: cbz x25, 8b
|
||||||
st1 {v1.16b}, [x6]
|
st1 {v1.16b}, [x25]
|
||||||
b 8b
|
b 8b
|
||||||
2: cbz x6, 8b
|
2: cbz x25, 8b
|
||||||
st1 {v4.16b}, [x6]
|
st1 {v4.16b}, [x25]
|
||||||
b 8b
|
b 8b
|
||||||
3: cbz x6, 8b
|
3: cbz x25, 8b
|
||||||
st1 {v6.16b}, [x6]
|
st1 {v6.16b}, [x25]
|
||||||
b 8b
|
b 8b
|
||||||
4: cbz x6, 8b
|
4: cbz x25, 8b
|
||||||
st1 {v3.16b}, [x6]
|
st1 {v3.16b}, [x25]
|
||||||
b 8b
|
b 8b
|
||||||
5: cbz x6, 8b
|
5: cbz x25, 8b
|
||||||
st1 {v7.16b}, [x6]
|
st1 {v7.16b}, [x25]
|
||||||
b 8b
|
b 8b
|
||||||
6: cbz x6, 8b
|
6: cbz x25, 8b
|
||||||
st1 {v2.16b}, [x6]
|
st1 {v2.16b}, [x25]
|
||||||
b 8b
|
b 8b
|
||||||
7: cbz x6, 8b
|
7: cbz x25, 8b
|
||||||
st1 {v5.16b}, [x6]
|
st1 {v5.16b}, [x25]
|
||||||
b 8b
|
b 8b
|
||||||
ENDPROC(aesbs_ctr_encrypt)
|
ENDPROC(aesbs_ctr_encrypt)
|
||||||
|
|
Loading…
Add table
Reference in a new issue