blob: cfb060fdc5f4d2d81bfd139e20c639eb9c4c5b2d [file] [log] [blame]
https://bugs.gentoo.org/498632
make sure we do not use unaligned loads/stores as some arches really hate that.
--- a/cipher-ctr-mt.c
+++ b/cipher-ctr-mt.c
@@ -58,8 +58,16 @@
/* Collect thread stats and print at cancellation when in debug mode */
/* #define CIPHER_THREAD_STATS */
-/* Use single-byte XOR instead of 8-byte XOR */
-/* #define CIPHER_BYTE_XOR */
+/* Can the system do unaligned loads natively? */
+#if defined(__aarch64__) || \
+ defined(__i386__) || \
+ defined(__powerpc__) || \
+ defined(__x86_64__)
+# define CIPHER_UNALIGNED_OK
+#endif
+#if defined(__SIZEOF_INT128__)
+# define CIPHER_INT128_OK
+#endif
/*-------------------- END TUNABLES --------------------*/
@@ -285,8 +293,20 @@ thread_loop(void *x)
static int
ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src,
- u_int len)
+ size_t len)
{
+ typedef union {
+#ifdef CIPHER_INT128_OK
+ __uint128_t *u128;
+#endif
+ uint64_t *u64;
+ uint32_t *u32;
+ uint8_t *u8;
+ const uint8_t *cu8;
+ uintptr_t u;
+ } ptrs_t;
+ ptrs_t destp, srcp, bufp;
+ uintptr_t align;
struct ssh_aes_ctr_ctx *c;
struct kq *q, *oldq;
int ridx;
@@ -301,35 +321,41 @@ ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src,
ridx = c->ridx;
/* src already padded to block multiple */
+ srcp.cu8 = src;
+ destp.u8 = dest;
while (len > 0) {
buf = q->keys[ridx];
+ bufp.u8 = buf;
-#ifdef CIPHER_BYTE_XOR
- dest[0] = src[0] ^ buf[0];
- dest[1] = src[1] ^ buf[1];
- dest[2] = src[2] ^ buf[2];
- dest[3] = src[3] ^ buf[3];
- dest[4] = src[4] ^ buf[4];
- dest[5] = src[5] ^ buf[5];
- dest[6] = src[6] ^ buf[6];
- dest[7] = src[7] ^ buf[7];
- dest[8] = src[8] ^ buf[8];
- dest[9] = src[9] ^ buf[9];
- dest[10] = src[10] ^ buf[10];
- dest[11] = src[11] ^ buf[11];
- dest[12] = src[12] ^ buf[12];
- dest[13] = src[13] ^ buf[13];
- dest[14] = src[14] ^ buf[14];
- dest[15] = src[15] ^ buf[15];
-#else
- *(uint64_t *)dest = *(uint64_t *)src ^ *(uint64_t *)buf;
- *(uint64_t *)(dest + 8) = *(uint64_t *)(src + 8) ^
- *(uint64_t *)(buf + 8);
-#endif
+ /* figure out the alignment on the fly */
+#ifdef CIPHER_UNALIGNED_OK
+ align = 0;
+#else
+ align = destp.u | srcp.u | bufp.u;
+#endif
+
+#ifdef CIPHER_INT128_OK
+ if ((align & 0xf) == 0) {
+ destp.u128[0] = srcp.u128[0] ^ bufp.u128[0];
+ } else
+#endif
+ if ((align & 0x7) == 0) {
+ destp.u64[0] = srcp.u64[0] ^ bufp.u64[0];
+ destp.u64[1] = srcp.u64[1] ^ bufp.u64[1];
+ } else if ((align & 0x3) == 0) {
+ destp.u32[0] = srcp.u32[0] ^ bufp.u32[0];
+ destp.u32[1] = srcp.u32[1] ^ bufp.u32[1];
+ destp.u32[2] = srcp.u32[2] ^ bufp.u32[2];
+ destp.u32[3] = srcp.u32[3] ^ bufp.u32[3];
+ } else {
+ size_t i;
+ for (i = 0; i < AES_BLOCK_SIZE; ++i)
+ dest[i] = src[i] ^ buf[i];
+ }
- dest += 16;
- src += 16;
- len -= 16;
+ destp.u += AES_BLOCK_SIZE;
+ srcp.u += AES_BLOCK_SIZE;
+ len -= AES_BLOCK_SIZE;
ssh_ctr_inc(ctx->iv, AES_BLOCK_SIZE);
/* Increment read index, switch queues on rollover */