| /* SPDX-License-Identifier: GPL-2.0 */ |
| /* |
| * NH - ε-almost-universal hash function, ARM64 NEON accelerated version |
| * |
| * Copyright 2018 Google LLC |
| * |
| * Author: Eric Biggers <ebiggers@google.com> |
| */ |
| |
| #include <linux/linkage.h> |
| |
| KEY .req x0 |
| MESSAGE .req x1 |
| MESSAGE_LEN .req x2 |
| HASH .req x3 |
| |
| PASS0_SUMS .req v0 |
| PASS1_SUMS .req v1 |
| PASS2_SUMS .req v2 |
| PASS3_SUMS .req v3 |
| K0 .req v4 |
| K1 .req v5 |
| K2 .req v6 |
| K3 .req v7 |
| T0 .req v8 |
| T1 .req v9 |
| T2 .req v10 |
| T3 .req v11 |
| T4 .req v12 |
| T5 .req v13 |
| T6 .req v14 |
| T7 .req v15 |
| |
| .macro _nh_stride k0, k1, k2, k3 |
| |
| // Load next message stride |
| ld1 {T3.16b}, [MESSAGE], #16 |
| |
| // Load next key stride |
| ld1 {\k3\().4s}, [KEY], #16 |
| |
| // Add message words to key words |
| add T0.4s, T3.4s, \k0\().4s |
| add T1.4s, T3.4s, \k1\().4s |
| add T2.4s, T3.4s, \k2\().4s |
| add T3.4s, T3.4s, \k3\().4s |
| |
| // Multiply 32x32 => 64 and accumulate |
| mov T4.d[0], T0.d[1] |
| mov T5.d[0], T1.d[1] |
| mov T6.d[0], T2.d[1] |
| mov T7.d[0], T3.d[1] |
| umlal PASS0_SUMS.2d, T0.2s, T4.2s |
| umlal PASS1_SUMS.2d, T1.2s, T5.2s |
| umlal PASS2_SUMS.2d, T2.2s, T6.2s |
| umlal PASS3_SUMS.2d, T3.2s, T7.2s |
| .endm |
| |
| /* |
| * void nh_neon(const u32 *key, const u8 *message, size_t message_len, |
| * u8 hash[NH_HASH_BYTES]) |
| * |
| * It's guaranteed that message_len % 16 == 0. |
| */ |
| ENTRY(nh_neon) |
| |
| ld1 {K0.4s,K1.4s}, [KEY], #32 |
| movi PASS0_SUMS.2d, #0 |
| movi PASS1_SUMS.2d, #0 |
| ld1 {K2.4s}, [KEY], #16 |
| movi PASS2_SUMS.2d, #0 |
| movi PASS3_SUMS.2d, #0 |
| |
| subs MESSAGE_LEN, MESSAGE_LEN, #64 |
| blt .Lloop4_done |
| .Lloop4: |
| _nh_stride K0, K1, K2, K3 |
| _nh_stride K1, K2, K3, K0 |
| _nh_stride K2, K3, K0, K1 |
| _nh_stride K3, K0, K1, K2 |
| subs MESSAGE_LEN, MESSAGE_LEN, #64 |
| bge .Lloop4 |
| |
| .Lloop4_done: |
| ands MESSAGE_LEN, MESSAGE_LEN, #63 |
| beq .Ldone |
| _nh_stride K0, K1, K2, K3 |
| |
| subs MESSAGE_LEN, MESSAGE_LEN, #16 |
| beq .Ldone |
| _nh_stride K1, K2, K3, K0 |
| |
| subs MESSAGE_LEN, MESSAGE_LEN, #16 |
| beq .Ldone |
| _nh_stride K2, K3, K0, K1 |
| |
| .Ldone: |
| // Sum the accumulators for each pass, then store the sums to 'hash' |
| addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d |
| addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d |
| st1 {T0.16b,T1.16b}, [HASH] |
| ret |
| ENDPROC(nh_neon) |