Add an ARM NEON implementation of NHPoly1305, an ε-almost-∆-universal hash function used in the Adiantum encryption mode. For now, only the NH portion is actually NEON-accelerated; the Poly1305 part is less performance-critical so is just implemented in C. Signed-off-by: Eric Biggers <ebiggers@google.com> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> (cherry picked from commit 16aae3595a9d41c97d983889b341c455779c2ecf https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master) Bug: 112008522 Test: As series, see Ic61c13b53facfd2173065be715a7ee5f3af8760b Change-Id: Ibe2a3fd8ed8522f08e136e48b11492d9f01a3160 Signed-off-by: Eric Biggers <ebiggers@google.com>tirimbino
parent
7db2b9c516
commit
59e501683f
@ -0,0 +1,116 @@ |
||||
/* SPDX-License-Identifier: GPL-2.0 */ |
||||
/* |
||||
* NH - ε-almost-universal hash function, NEON accelerated version |
||||
* |
||||
* Copyright 2018 Google LLC |
||||
* |
||||
* Author: Eric Biggers <ebiggers@google.com>
|
||||
*/ |
||||
|
||||
#include <linux/linkage.h> |
||||
|
||||
.text |
||||
.fpu neon
|
||||
|
||||
KEY .req r0 |
||||
MESSAGE .req r1 |
||||
MESSAGE_LEN .req r2 |
||||
HASH .req r3 |
||||
|
||||
PASS0_SUMS .req q0 |
||||
PASS0_SUM_A .req d0 |
||||
PASS0_SUM_B .req d1 |
||||
PASS1_SUMS .req q1 |
||||
PASS1_SUM_A .req d2 |
||||
PASS1_SUM_B .req d3 |
||||
PASS2_SUMS .req q2 |
||||
PASS2_SUM_A .req d4 |
||||
PASS2_SUM_B .req d5 |
||||
PASS3_SUMS .req q3 |
||||
PASS3_SUM_A .req d6 |
||||
PASS3_SUM_B .req d7 |
||||
K0 .req q4 |
||||
K1 .req q5 |
||||
K2 .req q6 |
||||
K3 .req q7 |
||||
T0 .req q8 |
||||
T0_L .req d16 |
||||
T0_H .req d17 |
||||
T1 .req q9 |
||||
T1_L .req d18 |
||||
T1_H .req d19 |
||||
T2 .req q10 |
||||
T2_L .req d20 |
||||
T2_H .req d21 |
||||
T3 .req q11 |
||||
T3_L .req d22 |
||||
T3_H .req d23 |
||||
|
||||
.macro _nh_stride k0, k1, k2, k3 |
||||
|
||||
// Load next message stride |
||||
vld1.8 {T3}, [MESSAGE]! |
||||
|
||||
// Load next key stride |
||||
vld1.32 {\k3}, [KEY]! |
||||
|
||||
// Add message words to key words |
||||
vadd.u32 T0, T3, \k0 |
||||
vadd.u32 T1, T3, \k1 |
||||
vadd.u32 T2, T3, \k2 |
||||
vadd.u32 T3, T3, \k3 |
||||
|
||||
// Multiply 32x32 => 64 and accumulate |
||||
vmlal.u32 PASS0_SUMS, T0_L, T0_H |
||||
vmlal.u32 PASS1_SUMS, T1_L, T1_H |
||||
vmlal.u32 PASS2_SUMS, T2_L, T2_H |
||||
vmlal.u32 PASS3_SUMS, T3_L, T3_H |
||||
.endm |
||||
|
||||
/* |
||||
* void nh_neon(const u32 *key, const u8 *message, size_t message_len, |
||||
* u8 hash[NH_HASH_BYTES]) |
||||
* |
||||
* It's guaranteed that message_len % 16 == 0. |
||||
*/ |
||||
ENTRY(nh_neon) |
||||
|
||||
vld1.32 {K0,K1}, [KEY]! |
||||
vmov.u64 PASS0_SUMS, #0 |
||||
vmov.u64 PASS1_SUMS, #0 |
||||
vld1.32 {K2}, [KEY]! |
||||
vmov.u64 PASS2_SUMS, #0 |
||||
vmov.u64 PASS3_SUMS, #0 |
||||
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #64 |
||||
blt .Lloop4_done |
||||
.Lloop4: |
||||
_nh_stride K0, K1, K2, K3 |
||||
_nh_stride K1, K2, K3, K0 |
||||
_nh_stride K2, K3, K0, K1 |
||||
_nh_stride K3, K0, K1, K2 |
||||
subs MESSAGE_LEN, MESSAGE_LEN, #64 |
||||
bge .Lloop4 |
||||
|
||||
.Lloop4_done: |
||||
ands MESSAGE_LEN, MESSAGE_LEN, #63 |
||||
beq .Ldone |
||||
_nh_stride K0, K1, K2, K3 |
||||
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #16 |
||||
beq .Ldone |
||||
_nh_stride K1, K2, K3, K0 |
||||
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #16 |
||||
beq .Ldone |
||||
_nh_stride K2, K3, K0, K1 |
||||
|
||||
.Ldone: |
||||
// Sum the accumulators for each pass, then store the sums to 'hash' |
||||
vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B |
||||
vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B |
||||
vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B |
||||
vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B |
||||
vst1.8 {T0-T1}, [HASH] |
||||
bx lr |
||||
ENDPROC(nh_neon) |
@ -0,0 +1,77 @@ |
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NHPoly1305 - ε-almost-∆-universal hash function for Adiantum |
||||
* (NEON accelerated version) |
||||
* |
||||
* Copyright 2018 Google LLC |
||||
*/ |
||||
|
||||
#include <asm/neon.h> |
||||
#include <asm/simd.h> |
||||
#include <crypto/internal/hash.h> |
||||
#include <crypto/nhpoly1305.h> |
||||
#include <linux/module.h> |
||||
|
||||
asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, |
||||
u8 hash[NH_HASH_BYTES]); |
||||
|
||||
/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ |
||||
static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, |
||||
__le64 hash[NH_NUM_PASSES]) |
||||
{ |
||||
nh_neon(key, message, message_len, (u8 *)hash); |
||||
} |
||||
|
||||
static int nhpoly1305_neon_update(struct shash_desc *desc, |
||||
const u8 *src, unsigned int srclen) |
||||
{ |
||||
if (srclen < 64 || !may_use_simd()) |
||||
return crypto_nhpoly1305_update(desc, src, srclen); |
||||
|
||||
do { |
||||
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); |
||||
|
||||
kernel_neon_begin(); |
||||
crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); |
||||
kernel_neon_end(); |
||||
src += n; |
||||
srclen -= n; |
||||
} while (srclen); |
||||
return 0; |
||||
} |
||||
|
||||
static struct shash_alg nhpoly1305_alg = { |
||||
.base.cra_name = "nhpoly1305", |
||||
.base.cra_driver_name = "nhpoly1305-neon", |
||||
.base.cra_priority = 200, |
||||
.base.cra_ctxsize = sizeof(struct nhpoly1305_key), |
||||
.base.cra_module = THIS_MODULE, |
||||
.digestsize = POLY1305_DIGEST_SIZE, |
||||
.init = crypto_nhpoly1305_init, |
||||
.update = nhpoly1305_neon_update, |
||||
.final = crypto_nhpoly1305_final, |
||||
.setkey = crypto_nhpoly1305_setkey, |
||||
.descsize = sizeof(struct nhpoly1305_state), |
||||
}; |
||||
|
||||
static int __init nhpoly1305_mod_init(void) |
||||
{ |
||||
if (!(elf_hwcap & HWCAP_NEON)) |
||||
return -ENODEV; |
||||
|
||||
return crypto_register_shash(&nhpoly1305_alg); |
||||
} |
||||
|
||||
static void __exit nhpoly1305_mod_exit(void) |
||||
{ |
||||
crypto_unregister_shash(&nhpoly1305_alg); |
||||
} |
||||
|
||||
module_init(nhpoly1305_mod_init); |
||||
module_exit(nhpoly1305_mod_exit); |
||||
|
||||
MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)"); |
||||
MODULE_LICENSE("GPL v2"); |
||||
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); |
||||
MODULE_ALIAS_CRYPTO("nhpoly1305"); |
||||
MODULE_ALIAS_CRYPTO("nhpoly1305-neon"); |
Loading…
Reference in new issue