This is a port to ARMv8 (Crypto Extensions) of the Intel implementation of the GHASH Secure Hash (used in the Galois/Counter chaining mode). It relies on the optional PMULL/PMULL2 instruction (polynomial multiply long, what Intel call carry-less multiply). Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Herbert Xu <herbert@gondor.apana.org.au>tirimbino
parent
6ba6c74dfc
commit
fdd2389457
@ -0,0 +1,95 @@ |
||||
/* |
||||
* Accelerated GHASH implementation with ARMv8 PMULL instructions. |
||||
* |
||||
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
* |
||||
* Based on arch/x86/crypto/ghash-pmullni-intel_asm.S |
||||
* |
||||
* Copyright (c) 2009 Intel Corp. |
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
* Vinodh Gopal |
||||
* Erdinc Ozturk |
||||
* Deniz Karakoyunlu |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published |
||||
* by the Free Software Foundation. |
||||
*/ |
||||
|
||||
#include <linux/linkage.h> |
||||
#include <asm/assembler.h> |
||||
|
||||
DATA .req v0 |
||||
SHASH .req v1 |
||||
IN1 .req v2 |
||||
T1 .req v2 |
||||
T2 .req v3 |
||||
T3 .req v4 |
||||
VZR .req v5 |
||||
|
||||
.text |
||||
.arch armv8-a+crypto |
||||
|
||||
/* |
||||
* void pmull_ghash_update(int blocks, u64 dg[], const char *src, |
||||
* struct ghash_key const *k, const char *head) |
||||
*/ |
||||
ENTRY(pmull_ghash_update) |
||||
ld1 {DATA.16b}, [x1] |
||||
ld1 {SHASH.16b}, [x3] |
||||
eor VZR.16b, VZR.16b, VZR.16b |
||||
|
||||
/* do the head block first, if supplied */ |
||||
cbz x4, 0f |
||||
ld1 {IN1.2d}, [x4] |
||||
b 1f |
||||
|
||||
0: ld1 {IN1.2d}, [x2], #16 |
||||
sub w0, w0, #1 |
||||
1: ext IN1.16b, IN1.16b, IN1.16b, #8 |
||||
CPU_LE( rev64 IN1.16b, IN1.16b ) |
||||
eor DATA.16b, DATA.16b, IN1.16b |
||||
|
||||
/* multiply DATA by SHASH in GF(2^128) */ |
||||
ext T2.16b, DATA.16b, DATA.16b, #8 |
||||
ext T3.16b, SHASH.16b, SHASH.16b, #8 |
||||
eor T2.16b, T2.16b, DATA.16b |
||||
eor T3.16b, T3.16b, SHASH.16b |
||||
|
||||
pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1 |
||||
pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0 |
||||
pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0) |
||||
eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0) |
||||
eor T2.16b, T2.16b, DATA.16b |
||||
|
||||
ext T3.16b, VZR.16b, T2.16b, #8 |
||||
ext T2.16b, T2.16b, VZR.16b, #8 |
||||
eor DATA.16b, DATA.16b, T3.16b |
||||
eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of |
||||
// carry-less multiplication |
||||
|
||||
/* first phase of the reduction */ |
||||
shl T3.2d, DATA.2d, #1 |
||||
eor T3.16b, T3.16b, DATA.16b |
||||
shl T3.2d, T3.2d, #5 |
||||
eor T3.16b, T3.16b, DATA.16b |
||||
shl T3.2d, T3.2d, #57 |
||||
ext T2.16b, VZR.16b, T3.16b, #8 |
||||
ext T3.16b, T3.16b, VZR.16b, #8 |
||||
eor DATA.16b, DATA.16b, T2.16b |
||||
eor T1.16b, T1.16b, T3.16b |
||||
|
||||
/* second phase of the reduction */ |
||||
ushr T2.2d, DATA.2d, #5 |
||||
eor T2.16b, T2.16b, DATA.16b |
||||
ushr T2.2d, T2.2d, #1 |
||||
eor T2.16b, T2.16b, DATA.16b |
||||
ushr T2.2d, T2.2d, #1 |
||||
eor T1.16b, T1.16b, T2.16b |
||||
eor DATA.16b, DATA.16b, T1.16b |
||||
|
||||
cbnz w0, 0b |
||||
|
||||
st1 {DATA.16b}, [x1] |
||||
ret |
||||
ENDPROC(pmull_ghash_update) |
@ -0,0 +1,155 @@ |
||||
/*
|
||||
* Accelerated GHASH implementation with ARMv8 PMULL instructions. |
||||
* |
||||
* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify it |
||||
* under the terms of the GNU General Public License version 2 as published |
||||
* by the Free Software Foundation. |
||||
*/ |
||||
|
||||
#include <asm/neon.h> |
||||
#include <asm/unaligned.h> |
||||
#include <crypto/internal/hash.h> |
||||
#include <linux/cpufeature.h> |
||||
#include <linux/crypto.h> |
||||
#include <linux/module.h> |
||||
|
||||
MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); |
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
||||
MODULE_LICENSE("GPL v2"); |
||||
|
||||
#define GHASH_BLOCK_SIZE 16 |
||||
#define GHASH_DIGEST_SIZE 16 |
||||
|
||||
struct ghash_key { |
||||
u64 a; |
||||
u64 b; |
||||
}; |
||||
|
||||
struct ghash_desc_ctx { |
||||
u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; |
||||
u8 buf[GHASH_BLOCK_SIZE]; |
||||
u32 count; |
||||
}; |
||||
|
||||
asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, |
||||
struct ghash_key const *k, const char *head); |
||||
|
||||
static int ghash_init(struct shash_desc *desc) |
||||
{ |
||||
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); |
||||
|
||||
*ctx = (struct ghash_desc_ctx){}; |
||||
return 0; |
||||
} |
||||
|
||||
static int ghash_update(struct shash_desc *desc, const u8 *src, |
||||
unsigned int len) |
||||
{ |
||||
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); |
||||
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; |
||||
|
||||
ctx->count += len; |
||||
|
||||
if ((partial + len) >= GHASH_BLOCK_SIZE) { |
||||
struct ghash_key *key = crypto_shash_ctx(desc->tfm); |
||||
int blocks; |
||||
|
||||
if (partial) { |
||||
int p = GHASH_BLOCK_SIZE - partial; |
||||
|
||||
memcpy(ctx->buf + partial, src, p); |
||||
src += p; |
||||
len -= p; |
||||
} |
||||
|
||||
blocks = len / GHASH_BLOCK_SIZE; |
||||
len %= GHASH_BLOCK_SIZE; |
||||
|
||||
kernel_neon_begin_partial(6); |
||||
pmull_ghash_update(blocks, ctx->digest, src, key, |
||||
partial ? ctx->buf : NULL); |
||||
kernel_neon_end(); |
||||
src += blocks * GHASH_BLOCK_SIZE; |
||||
} |
||||
if (len) |
||||
memcpy(ctx->buf + partial, src, len); |
||||
return 0; |
||||
} |
||||
|
||||
static int ghash_final(struct shash_desc *desc, u8 *dst) |
||||
{ |
||||
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); |
||||
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; |
||||
|
||||
if (partial) { |
||||
struct ghash_key *key = crypto_shash_ctx(desc->tfm); |
||||
|
||||
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); |
||||
|
||||
kernel_neon_begin_partial(6); |
||||
pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); |
||||
kernel_neon_end(); |
||||
} |
||||
put_unaligned_be64(ctx->digest[1], dst); |
||||
put_unaligned_be64(ctx->digest[0], dst + 8); |
||||
|
||||
*ctx = (struct ghash_desc_ctx){}; |
||||
return 0; |
||||
} |
||||
|
||||
static int ghash_setkey(struct crypto_shash *tfm, |
||||
const u8 *inkey, unsigned int keylen) |
||||
{ |
||||
struct ghash_key *key = crypto_shash_ctx(tfm); |
||||
u64 a, b; |
||||
|
||||
if (keylen != GHASH_BLOCK_SIZE) { |
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
||||
return -EINVAL; |
||||
} |
||||
|
||||
/* perform multiplication by 'x' in GF(2^128) */ |
||||
b = get_unaligned_be64(inkey); |
||||
a = get_unaligned_be64(inkey + 8); |
||||
|
||||
key->a = (a << 1) | (b >> 63); |
||||
key->b = (b << 1) | (a >> 63); |
||||
|
||||
if (b >> 63) |
||||
key->b ^= 0xc200000000000000UL; |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
static struct shash_alg ghash_alg = { |
||||
.digestsize = GHASH_DIGEST_SIZE, |
||||
.init = ghash_init, |
||||
.update = ghash_update, |
||||
.final = ghash_final, |
||||
.setkey = ghash_setkey, |
||||
.descsize = sizeof(struct ghash_desc_ctx), |
||||
.base = { |
||||
.cra_name = "ghash", |
||||
.cra_driver_name = "ghash-ce", |
||||
.cra_priority = 200, |
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH, |
||||
.cra_blocksize = GHASH_BLOCK_SIZE, |
||||
.cra_ctxsize = sizeof(struct ghash_key), |
||||
.cra_module = THIS_MODULE, |
||||
}, |
||||
}; |
||||
|
||||
static int __init ghash_ce_mod_init(void) |
||||
{ |
||||
return crypto_register_shash(&ghash_alg); |
||||
} |
||||
|
||||
static void __exit ghash_ce_mod_exit(void) |
||||
{ |
||||
crypto_unregister_shash(&ghash_alg); |
||||
} |
||||
|
||||
module_cpu_feature_match(PMULL, ghash_ce_mod_init); |
||||
module_exit(ghash_ce_mod_exit); |
Loading…
Reference in new issue