From 5368fe541c023130843cea361d779addb936b95c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 16 Jan 2013 02:20:31 +0100 Subject: [PATCH] sha3: rename KeccakF->sha3_process_block76. This brings the naming more in line with other hashes. Pulled most statics and constants into it. Also noticed that two byte arrays are 1 element too big. Signed-off-by: Denys Vlasenko --- libbb/hash_md5_sha.c | 160 ++++++++++++++++++++++--------------------- 1 file changed, 81 insertions(+), 79 deletions(-) diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index d143fc651..15588dcfe 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c @@ -190,10 +190,9 @@ static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx) int i; uint32_t temp; -# if BB_BIG_ENDIAN - for (i = 0; i < 16; i++) - words[i] = SWAP_LE32(words[i]); -# endif + if (BB_BIG_ENDIAN) + for (i = 0; i < 16; i++) + words[i] = SWAP_LE32(words[i]); # if MD5_SMALL == 3 pc = C_array; @@ -467,12 +466,13 @@ void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf) common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN); /* The MD5 result is in little endian byte order */ -#if BB_BIG_ENDIAN - ctx->hash[0] = SWAP_LE32(ctx->hash[0]); - ctx->hash[1] = SWAP_LE32(ctx->hash[1]); - ctx->hash[2] = SWAP_LE32(ctx->hash[2]); - ctx->hash[3] = SWAP_LE32(ctx->hash[3]); -#endif + if (BB_BIG_ENDIAN) { + ctx->hash[0] = SWAP_LE32(ctx->hash[0]); + ctx->hash[1] = SWAP_LE32(ctx->hash[1]); + ctx->hash[2] = SWAP_LE32(ctx->hash[2]); + ctx->hash[3] = SWAP_LE32(ctx->hash[3]); + } + memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4); } @@ -927,59 +927,61 @@ void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf) #endif enum { - KECCAK_IBLK_BYTES = 576 / 8, - KECCAK_NROUNDS = 24, + SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */ }; -/* Elements should be 64-bit, but top half is always zero or 0x80000000. - * We encode 63rd bits in a separate word below. - * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit. - * The speed penalty is lost in the noise. +/* + * In the crypto literature this function is usually called Keccak-f(). */ -static const uint16_t KECCAK_IOTA_CONST[KECCAK_NROUNDS] = { - 0x0001U, - 0x8082U, - 0x808aU, - 0x8000U, - 0x808bU, - 0x0001U, - 0x8081U, - 0x8009U, - 0x008aU, - 0x0088U, - 0x8009U, - 0x000aU, - 0x808bU, - 0x008bU, - 0x8089U, - 0x8003U, - 0x8002U, - 0x0080U, - 0x800aU, - 0x000aU, - 0x8081U, - 0x8080U, - 0x0001U, - 0x8008U, -}; -/* bit from CONST[0] is msb: 0011 0011 0000 0111 1101 1101 */ -#define KECCAK_IOTA_CONST_bit63 ((uint32_t)(0x3307dd00)) -/* bit from CONST[0] is msb: 0001 0110 0011 1000 0001 1011 */ -#define KECCAK_IOTA_CONST_bit31 ((uint32_t)(0x16381b00)) - -static const uint8_t KECCAK_ROT_CONST[25] = { - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25, 43, 62, - 18, 39, 61, 20, 44 -}; - -static const uint8_t KECCAK_PI_LANE[25] = { - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, - 14, 22, 9, 6, 1 -}; - -static void KeccakF(uint64_t *state) +static void sha3_process_block76(uint64_t *state) { - /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 };*/ + enum { NROUNDS = 24 }; + + /* Elements should be 64-bit, but top half is always zero or 0x80000000. + * We encode 63rd bits in a separate word below. + * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit. + * The speed penalty is lost in the noise. + */ + static const uint16_t IOTA_CONST[NROUNDS] = { + 0x0001, + 0x8082, + 0x808a, + 0x8000, + 0x808b, + 0x0001, + 0x8081, + 0x8009, + 0x008a, + 0x0088, + 0x8009, + 0x000a, + 0x808b, + 0x008b, + 0x8089, + 0x8003, + 0x8002, + 0x0080, + 0x800a, + 0x000a, + 0x8081, + 0x8080, + 0x0001, + 0x8008, + }; + /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */ + const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00); + /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */ + const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00); + + static const uint8_t ROT_CONST[24] = { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44, + }; + static const uint8_t PI_LANE[24] = { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1, + }; + /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/ unsigned x, y; unsigned round; @@ -990,7 +992,7 @@ static void KeccakF(uint64_t *state) } } - for (round = 0; round < KECCAK_NROUNDS; ++round) { + for (round = 0; round < NROUNDS; ++round) { /* Theta */ { uint64_t BC[10]; @@ -1017,24 +1019,24 @@ static void KeccakF(uint64_t *state) if (SHA3_SMALL) { uint64_t t1 = state[1]; for (x = 0; x < 24; ++x) { - uint64_t t0 = state[KECCAK_PI_LANE[x]]; - state[KECCAK_PI_LANE[x]] = rotl64(t1, KECCAK_ROT_CONST[x]); + uint64_t t0 = state[PI_LANE[x]]; + state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]); t1 = t0; } } else { /* Especially large benefit for 32-bit arch (75% faster): * 64-bit rotations by non-constant usually are SLOW on those. * We resort to unrolling here. - * This optimizes out KECCAK_PI_LANE[] and KECCAK_ROT_CONST[], + * This optimizes out PI_LANE[] and ROT_CONST[], * but generates 300-500 more bytes of code. */ uint64_t t0; uint64_t t1 = state[1]; #define RhoPi_twice(x) \ - t0 = state[KECCAK_PI_LANE[x ]]; \ - state[KECCAK_PI_LANE[x ]] = rotl64(t1, KECCAK_ROT_CONST[x ]); \ - t1 = state[KECCAK_PI_LANE[x+1]]; \ - state[KECCAK_PI_LANE[x+1]] = rotl64(t0, KECCAK_ROT_CONST[x+1]); + t0 = state[PI_LANE[x ]]; \ + state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \ + t1 = state[PI_LANE[x+1]]; \ + state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]); RhoPi_twice(0); RhoPi_twice(2); RhoPi_twice(4); RhoPi_twice(6); RhoPi_twice(8); RhoPi_twice(10); @@ -1060,9 +1062,9 @@ static void KeccakF(uint64_t *state) } /* Iota */ - state[0] ^= KECCAK_IOTA_CONST[round] - | (uint32_t)((KECCAK_IOTA_CONST_bit31 << round) & 0x80000000) - | (uint64_t)((KECCAK_IOTA_CONST_bit63 << round) & 0x80000000) << 32; + state[0] ^= IOTA_CONST[round] + | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000) + | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32; } if (BB_BIG_ENDIAN) { @@ -1088,19 +1090,19 @@ void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buf, size_t bytes) buffer[bytes_queued] ^= *data++; bytes--; bytes_queued++; - if (bytes_queued == KECCAK_IBLK_BYTES) { - KeccakF(ctx->state); + if (bytes_queued == SHA3_IBLK_BYTES) { + sha3_process_block76(ctx->state); bytes_queued = 0; } } /* Absorb complete blocks */ - while (bytes >= KECCAK_IBLK_BYTES) { + while (bytes >= SHA3_IBLK_BYTES) { /* XOR data onto beginning of state[]. * We try to be efficient - operate on word at a time, not byte. * Yet safe wrt unaligned access: can't just use "*(long*)data"... */ - unsigned count = KECCAK_IBLK_BYTES / sizeof(long); + unsigned count = SHA3_IBLK_BYTES / sizeof(long); long *buffer = (long*)ctx->state; do { long v; @@ -1109,9 +1111,9 @@ void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buf, size_t bytes) data += sizeof(long); } while (--count); - KeccakF(ctx->state); + sha3_process_block76(ctx->state); - bytes -= KECCAK_IBLK_BYTES; + bytes -= SHA3_IBLK_BYTES; } /* Queue remaining data bytes */ @@ -1129,10 +1131,10 @@ void FAST_FUNC sha3_end(sha3_ctx_t *ctx, uint8_t *hashval) { /* Padding */ uint8_t *buffer = (uint8_t*)ctx->state; - buffer[ctx->bytes_queued] ^= 1; - buffer[KECCAK_IBLK_BYTES - 1] ^= 0x80; + buffer[ctx->bytes_queued] ^= 1; + buffer[SHA3_IBLK_BYTES - 1] ^= 0x80; - KeccakF(ctx->state); + sha3_process_block76(ctx->state); /* Output */ memcpy(hashval, ctx->state, 64);