mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-27 00:51:35 +00:00
crypto: x86/aes-ni-xts - use direct calls to and 4-way stride
The XTS asm helper arrangement is a bit odd: the 8-way stride helper
consists of back-to-back calls to the 4-way core transforms, which
are called indirectly, based on a boolean that indicates whether we
are performing encryption or decryption.
Given how costly indirect calls are on x86, let's switch to direct
calls, and given how the 8-way stride doesn't really add anything
substantial, use a 4-way stride instead, and make the asm core
routine deal with any multiple of 4 blocks. Since 512 byte sectors
or 4 KB blocks are the typical quantities XTS operates on, increase
the stride exported to the glue helper to 512 bytes as well.
As a result, the number of indirect calls is reduced from 3 per 64 bytes
of in/output to 1 per 512 bytes of in/output, which produces a 65% speedup
when operating on 1 KB blocks (measured on a Intel(R) Core(TM) i7-8650U CPU)
Fixes: 9697fa39ef
("x86/retpoline/crypto: Convert crypto assembler indirect jumps")
Tested-by: Eric Biggers <ebiggers@google.com> # x86_64
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
fecff3b931
commit
86ad60a65f
2 changed files with 93 additions and 65 deletions
|
@ -2842,25 +2842,18 @@ SYM_FUNC_END(aesni_ctr_enc)
|
||||||
pxor CTR, IV;
|
pxor CTR, IV;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst,
|
* void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
|
||||||
* const u8 *src, bool enc, le128 *iv)
|
* const u8 *src, unsigned int len, le128 *iv)
|
||||||
*/
|
*/
|
||||||
SYM_FUNC_START(aesni_xts_crypt8)
|
SYM_FUNC_START(aesni_xts_encrypt)
|
||||||
FRAME_BEGIN
|
FRAME_BEGIN
|
||||||
testb %cl, %cl
|
|
||||||
movl $0, %ecx
|
|
||||||
movl $240, %r10d
|
|
||||||
leaq _aesni_enc4, %r11
|
|
||||||
leaq _aesni_dec4, %rax
|
|
||||||
cmovel %r10d, %ecx
|
|
||||||
cmoveq %rax, %r11
|
|
||||||
|
|
||||||
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
|
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
|
||||||
movups (IVP), IV
|
movups (IVP), IV
|
||||||
|
|
||||||
mov 480(KEYP), KLEN
|
mov 480(KEYP), KLEN
|
||||||
addq %rcx, KEYP
|
|
||||||
|
|
||||||
|
.Lxts_enc_loop4:
|
||||||
movdqa IV, STATE1
|
movdqa IV, STATE1
|
||||||
movdqu 0x00(INP), INC
|
movdqu 0x00(INP), INC
|
||||||
pxor INC, STATE1
|
pxor INC, STATE1
|
||||||
|
@ -2884,71 +2877,103 @@ SYM_FUNC_START(aesni_xts_crypt8)
|
||||||
pxor INC, STATE4
|
pxor INC, STATE4
|
||||||
movdqu IV, 0x30(OUTP)
|
movdqu IV, 0x30(OUTP)
|
||||||
|
|
||||||
CALL_NOSPEC r11
|
call _aesni_enc4
|
||||||
|
|
||||||
movdqu 0x00(OUTP), INC
|
movdqu 0x00(OUTP), INC
|
||||||
pxor INC, STATE1
|
pxor INC, STATE1
|
||||||
movdqu STATE1, 0x00(OUTP)
|
movdqu STATE1, 0x00(OUTP)
|
||||||
|
|
||||||
_aesni_gf128mul_x_ble()
|
|
||||||
movdqa IV, STATE1
|
|
||||||
movdqu 0x40(INP), INC
|
|
||||||
pxor INC, STATE1
|
|
||||||
movdqu IV, 0x40(OUTP)
|
|
||||||
|
|
||||||
movdqu 0x10(OUTP), INC
|
movdqu 0x10(OUTP), INC
|
||||||
pxor INC, STATE2
|
pxor INC, STATE2
|
||||||
movdqu STATE2, 0x10(OUTP)
|
movdqu STATE2, 0x10(OUTP)
|
||||||
|
|
||||||
_aesni_gf128mul_x_ble()
|
|
||||||
movdqa IV, STATE2
|
|
||||||
movdqu 0x50(INP), INC
|
|
||||||
pxor INC, STATE2
|
|
||||||
movdqu IV, 0x50(OUTP)
|
|
||||||
|
|
||||||
movdqu 0x20(OUTP), INC
|
movdqu 0x20(OUTP), INC
|
||||||
pxor INC, STATE3
|
pxor INC, STATE3
|
||||||
movdqu STATE3, 0x20(OUTP)
|
movdqu STATE3, 0x20(OUTP)
|
||||||
|
|
||||||
_aesni_gf128mul_x_ble()
|
|
||||||
movdqa IV, STATE3
|
|
||||||
movdqu 0x60(INP), INC
|
|
||||||
pxor INC, STATE3
|
|
||||||
movdqu IV, 0x60(OUTP)
|
|
||||||
|
|
||||||
movdqu 0x30(OUTP), INC
|
movdqu 0x30(OUTP), INC
|
||||||
pxor INC, STATE4
|
pxor INC, STATE4
|
||||||
movdqu STATE4, 0x30(OUTP)
|
movdqu STATE4, 0x30(OUTP)
|
||||||
|
|
||||||
_aesni_gf128mul_x_ble()
|
_aesni_gf128mul_x_ble()
|
||||||
movdqa IV, STATE4
|
|
||||||
movdqu 0x70(INP), INC
|
|
||||||
pxor INC, STATE4
|
|
||||||
movdqu IV, 0x70(OUTP)
|
|
||||||
|
|
||||||
_aesni_gf128mul_x_ble()
|
add $64, INP
|
||||||
|
add $64, OUTP
|
||||||
|
sub $64, LEN
|
||||||
|
ja .Lxts_enc_loop4
|
||||||
|
|
||||||
movups IV, (IVP)
|
movups IV, (IVP)
|
||||||
|
|
||||||
CALL_NOSPEC r11
|
|
||||||
|
|
||||||
movdqu 0x40(OUTP), INC
|
|
||||||
pxor INC, STATE1
|
|
||||||
movdqu STATE1, 0x40(OUTP)
|
|
||||||
|
|
||||||
movdqu 0x50(OUTP), INC
|
|
||||||
pxor INC, STATE2
|
|
||||||
movdqu STATE2, 0x50(OUTP)
|
|
||||||
|
|
||||||
movdqu 0x60(OUTP), INC
|
|
||||||
pxor INC, STATE3
|
|
||||||
movdqu STATE3, 0x60(OUTP)
|
|
||||||
|
|
||||||
movdqu 0x70(OUTP), INC
|
|
||||||
pxor INC, STATE4
|
|
||||||
movdqu STATE4, 0x70(OUTP)
|
|
||||||
|
|
||||||
FRAME_END
|
FRAME_END
|
||||||
ret
|
ret
|
||||||
SYM_FUNC_END(aesni_xts_crypt8)
|
SYM_FUNC_END(aesni_xts_encrypt)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
|
||||||
|
* const u8 *src, unsigned int len, le128 *iv)
|
||||||
|
*/
|
||||||
|
SYM_FUNC_START(aesni_xts_decrypt)
|
||||||
|
FRAME_BEGIN
|
||||||
|
|
||||||
|
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
|
||||||
|
movups (IVP), IV
|
||||||
|
|
||||||
|
mov 480(KEYP), KLEN
|
||||||
|
add $240, KEYP
|
||||||
|
|
||||||
|
.Lxts_dec_loop4:
|
||||||
|
movdqa IV, STATE1
|
||||||
|
movdqu 0x00(INP), INC
|
||||||
|
pxor INC, STATE1
|
||||||
|
movdqu IV, 0x00(OUTP)
|
||||||
|
|
||||||
|
_aesni_gf128mul_x_ble()
|
||||||
|
movdqa IV, STATE2
|
||||||
|
movdqu 0x10(INP), INC
|
||||||
|
pxor INC, STATE2
|
||||||
|
movdqu IV, 0x10(OUTP)
|
||||||
|
|
||||||
|
_aesni_gf128mul_x_ble()
|
||||||
|
movdqa IV, STATE3
|
||||||
|
movdqu 0x20(INP), INC
|
||||||
|
pxor INC, STATE3
|
||||||
|
movdqu IV, 0x20(OUTP)
|
||||||
|
|
||||||
|
_aesni_gf128mul_x_ble()
|
||||||
|
movdqa IV, STATE4
|
||||||
|
movdqu 0x30(INP), INC
|
||||||
|
pxor INC, STATE4
|
||||||
|
movdqu IV, 0x30(OUTP)
|
||||||
|
|
||||||
|
call _aesni_dec4
|
||||||
|
|
||||||
|
movdqu 0x00(OUTP), INC
|
||||||
|
pxor INC, STATE1
|
||||||
|
movdqu STATE1, 0x00(OUTP)
|
||||||
|
|
||||||
|
movdqu 0x10(OUTP), INC
|
||||||
|
pxor INC, STATE2
|
||||||
|
movdqu STATE2, 0x10(OUTP)
|
||||||
|
|
||||||
|
movdqu 0x20(OUTP), INC
|
||||||
|
pxor INC, STATE3
|
||||||
|
movdqu STATE3, 0x20(OUTP)
|
||||||
|
|
||||||
|
movdqu 0x30(OUTP), INC
|
||||||
|
pxor INC, STATE4
|
||||||
|
movdqu STATE4, 0x30(OUTP)
|
||||||
|
|
||||||
|
_aesni_gf128mul_x_ble()
|
||||||
|
|
||||||
|
add $64, INP
|
||||||
|
add $64, OUTP
|
||||||
|
sub $64, LEN
|
||||||
|
ja .Lxts_dec_loop4
|
||||||
|
|
||||||
|
movups IV, (IVP)
|
||||||
|
|
||||||
|
FRAME_END
|
||||||
|
ret
|
||||||
|
SYM_FUNC_END(aesni_xts_decrypt)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -101,6 +101,12 @@ asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
|
||||||
#define AVX_GEN2_OPTSIZE 640
|
#define AVX_GEN2_OPTSIZE 640
|
||||||
#define AVX_GEN4_OPTSIZE 4096
|
#define AVX_GEN4_OPTSIZE 4096
|
||||||
|
|
||||||
|
asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned int len, u8 *iv);
|
||||||
|
|
||||||
|
asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out,
|
||||||
|
const u8 *in, unsigned int len, u8 *iv);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
|
||||||
static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
|
static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
|
||||||
|
@ -108,9 +114,6 @@ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
|
||||||
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
|
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
|
||||||
const u8 *in, unsigned int len, u8 *iv);
|
const u8 *in, unsigned int len, u8 *iv);
|
||||||
|
|
||||||
asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out,
|
|
||||||
const u8 *in, bool enc, le128 *iv);
|
|
||||||
|
|
||||||
/* asmlinkage void aesni_gcm_enc()
|
/* asmlinkage void aesni_gcm_enc()
|
||||||
* void *ctx, AES Key schedule. Starts on a 16 byte boundary.
|
* void *ctx, AES Key schedule. Starts on a 16 byte boundary.
|
||||||
* struct gcm_context_data. May be uninitialized.
|
* struct gcm_context_data. May be uninitialized.
|
||||||
|
@ -663,14 +666,14 @@ static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
|
||||||
glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
|
glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
|
static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
|
||||||
{
|
{
|
||||||
aesni_xts_crypt8(ctx, dst, src, true, iv);
|
aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
|
static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
|
||||||
{
|
{
|
||||||
aesni_xts_crypt8(ctx, dst, src, false, iv);
|
aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct common_glue_ctx aesni_enc_xts = {
|
static const struct common_glue_ctx aesni_enc_xts = {
|
||||||
|
@ -678,8 +681,8 @@ static const struct common_glue_ctx aesni_enc_xts = {
|
||||||
.fpu_blocks_limit = 1,
|
.fpu_blocks_limit = 1,
|
||||||
|
|
||||||
.funcs = { {
|
.funcs = { {
|
||||||
.num_blocks = 8,
|
.num_blocks = 32,
|
||||||
.fn_u = { .xts = aesni_xts_enc8 }
|
.fn_u = { .xts = aesni_xts_enc32 }
|
||||||
}, {
|
}, {
|
||||||
.num_blocks = 1,
|
.num_blocks = 1,
|
||||||
.fn_u = { .xts = aesni_xts_enc }
|
.fn_u = { .xts = aesni_xts_enc }
|
||||||
|
@ -691,8 +694,8 @@ static const struct common_glue_ctx aesni_dec_xts = {
|
||||||
.fpu_blocks_limit = 1,
|
.fpu_blocks_limit = 1,
|
||||||
|
|
||||||
.funcs = { {
|
.funcs = { {
|
||||||
.num_blocks = 8,
|
.num_blocks = 32,
|
||||||
.fn_u = { .xts = aesni_xts_dec8 }
|
.fn_u = { .xts = aesni_xts_dec32 }
|
||||||
}, {
|
}, {
|
||||||
.num_blocks = 1,
|
.num_blocks = 1,
|
||||||
.fn_u = { .xts = aesni_xts_dec }
|
.fn_u = { .xts = aesni_xts_dec }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue