shrink sha hashing a bit more (remove wbuflen field from ctx),

remove the requirement for aligned buffer

function                                             old     new   delta
sha512_hash                                          262     297     +35
sha1_end                                             136     143      +7
passwd_main                                         1019    1023      +4
sha256_end                                           135     137      +2
count_lines                                           72      74      +2
sha256_hash                                          259     260      +1
popstring                                            164     158      -6
sha512_begin                                          88      81      -7
sha256_begin                                          44      37      -7
parse_expr                                           832     824      -8
bbunpack                                             446     438      -8
sha256_process_block64                               529     520      -9
md5_end                                              166     151     -15
evaltreenr                                           817     802     -15
evaltree                                             817     802     -15
sha512_end                                           204     182     -22
sha512_process_block128                             1444    1405     -39
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 6/11 up/down: 51/-151)         Total: -100 bytes
This commit is contained in:
Denis Vlasenko 2009-03-12 15:40:27 +00:00
parent 6a5d9faa29
commit cd2cd312b7
3 changed files with 113 additions and 136 deletions

View File

@ -1329,18 +1329,16 @@ void sha1_begin(sha1_ctx_t *ctx) FAST_FUNC;
void sha1_hash(const void *data, size_t length, sha1_ctx_t *ctx) FAST_FUNC; void sha1_hash(const void *data, size_t length, sha1_ctx_t *ctx) FAST_FUNC;
void sha1_end(void *resbuf, sha1_ctx_t *ctx) FAST_FUNC; void sha1_end(void *resbuf, sha1_ctx_t *ctx) FAST_FUNC;
typedef struct sha256_ctx_t { typedef struct sha256_ctx_t {
unsigned wbuflen;
uint32_t H[8];
uint64_t total64; uint64_t total64;
uint32_t hash[8];
char wbuffer[64*2]; /* NB: always correctly aligned for uint64_t */ char wbuffer[64*2]; /* NB: always correctly aligned for uint64_t */
} sha256_ctx_t; } sha256_ctx_t;
void sha256_begin(sha256_ctx_t *ctx) FAST_FUNC; void sha256_begin(sha256_ctx_t *ctx) FAST_FUNC;
void sha256_hash(const void *buffer, size_t len, sha256_ctx_t *ctx) FAST_FUNC; void sha256_hash(const void *buffer, size_t len, sha256_ctx_t *ctx) FAST_FUNC;
void sha256_end(void *resbuf, sha256_ctx_t *ctx) FAST_FUNC; void sha256_end(void *resbuf, sha256_ctx_t *ctx) FAST_FUNC;
typedef struct sha512_ctx_t { typedef struct sha512_ctx_t {
unsigned wbuflen;
uint64_t H[8];
uint64_t total64[2]; uint64_t total64[2];
uint64_t hash[8];
char wbuffer[128*2]; /* NB: always correctly aligned for uint64_t */ char wbuffer[128*2]; /* NB: always correctly aligned for uint64_t */
} sha512_ctx_t; } sha512_ctx_t;
void sha512_begin(sha512_ctx_t *ctx) FAST_FUNC; void sha512_begin(sha512_ctx_t *ctx) FAST_FUNC;

View File

@ -416,15 +416,14 @@ void FAST_FUNC md5_end(void *resbuf, md5_ctx_t *ctx)
md5_hash_block(ctx->buffer, ctx); md5_hash_block(ctx->buffer, ctx);
md5_hash_block(buf, ctx); md5_hash_block(buf, ctx);
/* Put result from CTX in first 16 bytes following RESBUF. The result is /* The MD5 result is in little endian byte order.
* always in little endian byte order, so that a byte-wise output yields * We (ab)use the fact that A-D are consecutive in memory.
* to the wanted ASCII representation of the message digest.
*
* IMPORTANT: On some systems it is required that RESBUF is correctly
* aligned for a 32 bits value.
*/ */
((uint32_t *) resbuf)[0] = SWAP_LE32(ctx->A); #if BB_BIG_ENDIAN
((uint32_t *) resbuf)[1] = SWAP_LE32(ctx->B); ctx->A = SWAP_LE32(ctx->A);
((uint32_t *) resbuf)[2] = SWAP_LE32(ctx->C); ctx->B = SWAP_LE32(ctx->B);
((uint32_t *) resbuf)[3] = SWAP_LE32(ctx->D); ctx->C = SWAP_LE32(ctx->C);
ctx->D = SWAP_LE32(ctx->D);
#endif
memcpy(resbuf, &ctx->A, sizeof(ctx->A) * 4);
} }

View File

@ -200,20 +200,14 @@ static const uint32_t K512_lo[80] = {
static void sha256_process_block64(const void *buffer, size_t len, sha256_ctx_t *ctx) static void sha256_process_block64(const void *buffer, size_t len, sha256_ctx_t *ctx)
{ {
const uint32_t *words = buffer; const uint32_t *words = buffer;
uint32_t a = ctx->H[0]; uint32_t a = ctx->hash[0];
uint32_t b = ctx->H[1]; uint32_t b = ctx->hash[1];
uint32_t c = ctx->H[2]; uint32_t c = ctx->hash[2];
uint32_t d = ctx->H[3]; uint32_t d = ctx->hash[3];
uint32_t e = ctx->H[4]; uint32_t e = ctx->hash[4];
uint32_t f = ctx->H[5]; uint32_t f = ctx->hash[5];
uint32_t g = ctx->H[6]; uint32_t g = ctx->hash[6];
uint32_t h = ctx->H[7]; uint32_t h = ctx->hash[7];
/* First increment the byte count. FIPS 180-2 specifies the possible
length of the file up to 2^64 _bits_.
We compute the number of _bytes_ and convert to bits later. */
len &= ~(size_t)(sizeof(uint32_t) * 16 - 1);
ctx->total64 += len;
/* Process all bytes in the buffer with 64 bytes in each round of /* Process all bytes in the buffer with 64 bytes in each round of
the loop. */ the loop. */
@ -260,14 +254,14 @@ static void sha256_process_block64(const void *buffer, size_t len, sha256_ctx_t
#undef R1 #undef R1
/* Add the starting values of the context according to FIPS 180-2:6.2.2 /* Add the starting values of the context according to FIPS 180-2:6.2.2
step 4. */ step 4. */
ctx->H[0] = a += ctx->H[0]; ctx->hash[0] = a += ctx->hash[0];
ctx->H[1] = b += ctx->H[1]; ctx->hash[1] = b += ctx->hash[1];
ctx->H[2] = c += ctx->H[2]; ctx->hash[2] = c += ctx->hash[2];
ctx->H[3] = d += ctx->H[3]; ctx->hash[3] = d += ctx->hash[3];
ctx->H[4] = e += ctx->H[4]; ctx->hash[4] = e += ctx->hash[4];
ctx->H[5] = f += ctx->H[5]; ctx->hash[5] = f += ctx->hash[5];
ctx->H[6] = g += ctx->H[6]; ctx->hash[6] = g += ctx->hash[6];
ctx->H[7] = h += ctx->H[7]; ctx->hash[7] = h += ctx->hash[7];
/* Prepare for the next round. */ /* Prepare for the next round. */
len--; len--;
@ -278,22 +272,14 @@ static void sha256_process_block64(const void *buffer, size_t len, sha256_ctx_t
static void sha512_process_block128(const void *buffer, size_t len, sha512_ctx_t *ctx) static void sha512_process_block128(const void *buffer, size_t len, sha512_ctx_t *ctx)
{ {
const uint64_t *words = buffer; const uint64_t *words = buffer;
uint64_t a = ctx->H[0]; uint64_t a = ctx->hash[0];
uint64_t b = ctx->H[1]; uint64_t b = ctx->hash[1];
uint64_t c = ctx->H[2]; uint64_t c = ctx->hash[2];
uint64_t d = ctx->H[3]; uint64_t d = ctx->hash[3];
uint64_t e = ctx->H[4]; uint64_t e = ctx->hash[4];
uint64_t f = ctx->H[5]; uint64_t f = ctx->hash[5];
uint64_t g = ctx->H[6]; uint64_t g = ctx->hash[6];
uint64_t h = ctx->H[7]; uint64_t h = ctx->hash[7];
/* First increment the byte count. FIPS 180-2 specifies the possible
length of the file up to 2^128 _bits_.
We compute the number of _bytes_ and convert to bits later. */
len &= ~(size_t)(sizeof(uint64_t) * 16 - 1);
ctx->total64[0] += len;
if (ctx->total64[0] < len)
ctx->total64[1]++;
len /= (sizeof(uint64_t) * 16); len /= (sizeof(uint64_t) * 16);
while (len) { while (len) {
@ -338,14 +324,14 @@ static void sha512_process_block128(const void *buffer, size_t len, sha512_ctx_t
#undef R1 #undef R1
/* Add the starting values of the context according to FIPS 180-2:6.3.2 /* Add the starting values of the context according to FIPS 180-2:6.3.2
step 4. */ step 4. */
ctx->H[0] = a += ctx->H[0]; ctx->hash[0] = a += ctx->hash[0];
ctx->H[1] = b += ctx->H[1]; ctx->hash[1] = b += ctx->hash[1];
ctx->H[2] = c += ctx->H[2]; ctx->hash[2] = c += ctx->hash[2];
ctx->H[3] = d += ctx->H[3]; ctx->hash[3] = d += ctx->hash[3];
ctx->H[4] = e += ctx->H[4]; ctx->hash[4] = e += ctx->hash[4];
ctx->H[5] = f += ctx->H[5]; ctx->hash[5] = f += ctx->hash[5];
ctx->H[6] = g += ctx->H[6]; ctx->hash[6] = g += ctx->hash[6];
ctx->H[7] = h += ctx->H[7]; ctx->hash[7] = h += ctx->hash[7];
len--; len--;
} }
@ -386,9 +372,8 @@ static const uint32_t init512_lo[] = {
(FIPS 180-2:5.3.2) */ (FIPS 180-2:5.3.2) */
void FAST_FUNC sha256_begin(sha256_ctx_t *ctx) void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
{ {
memcpy(ctx->H, init256, sizeof(init256)); memcpy(ctx->hash, init256, sizeof(init256));
ctx->total64 = 0; ctx->total64 = 0;
ctx->wbuflen = 0;
} }
/* Initialize structure containing state of computation. /* Initialize structure containing state of computation.
(FIPS 180-2:5.3.3) */ (FIPS 180-2:5.3.3) */
@ -396,9 +381,8 @@ void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
{ {
int i; int i;
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
ctx->H[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i]; ctx->hash[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
ctx->total64[0] = ctx->total64[1] = 0; ctx->total64[0] = ctx->total64[1] = 0;
ctx->wbuflen = 0;
} }
@ -406,28 +390,35 @@ void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
/* hash_compile function as required. */ /* hash_compile function as required. */
void FAST_FUNC sha1_hash(const void *buffer, size_t len, sha1_ctx_t *ctx) void FAST_FUNC sha1_hash(const void *buffer, size_t len, sha1_ctx_t *ctx)
{ {
unsigned wbuflen = ctx->total64 & SHA1_MASK; unsigned in_buf = ctx->total64 & SHA1_MASK;
unsigned add = SHA1_BLOCK_SIZE - wbuflen; unsigned add = SHA1_BLOCK_SIZE - in_buf;
ctx->total64 += len; ctx->total64 += len;
while (len >= add) { /* transfer whole blocks while possible */ while (len >= add) { /* transfer whole blocks while possible */
memcpy(((unsigned char *) ctx->wbuffer) + wbuflen, buffer, add); memcpy(((unsigned char *) ctx->wbuffer) + in_buf, buffer, add);
buffer = (const char *)buffer + add; buffer = (const char *)buffer + add;
len -= add; len -= add;
add = SHA1_BLOCK_SIZE; add = SHA1_BLOCK_SIZE;
wbuflen = 0; in_buf = 0;
sha1_process_block64(ctx); sha1_process_block64(ctx);
} }
memcpy(((unsigned char *) ctx->wbuffer) + wbuflen, buffer, len); memcpy(((unsigned char *) ctx->wbuffer) + in_buf, buffer, len);
} }
void FAST_FUNC sha256_hash(const void *buffer, size_t len, sha256_ctx_t *ctx) void FAST_FUNC sha256_hash(const void *buffer, size_t len, sha256_ctx_t *ctx)
{ {
unsigned in_buf = ctx->total64 & 63;
/* First increment the byte count. FIPS 180-2 specifies the possible
length of the file up to 2^64 _bits_.
We compute the number of _bytes_ and convert to bits later. */
ctx->total64 += len;
/* When we already have some bits in our internal buffer concatenate /* When we already have some bits in our internal buffer concatenate
both inputs first. */ both inputs first. */
if (ctx->wbuflen != 0) { if (in_buf != 0) {
unsigned add; unsigned add;
/* NB: 1/2 of wbuffer is used only in sha256_end /* NB: 1/2 of wbuffer is used only in sha256_end
@ -435,18 +426,17 @@ void FAST_FUNC sha256_hash(const void *buffer, size_t len, sha256_ctx_t *ctx)
* With buffer twice as small, it may happen that * With buffer twice as small, it may happen that
* we have it almost full and can't add length field. */ * we have it almost full and can't add length field. */
add = sizeof(ctx->wbuffer)/2 - ctx->wbuflen; add = sizeof(ctx->wbuffer)/2 - in_buf;
if (add > len) if (add > len)
add = len; add = len;
memcpy(&ctx->wbuffer[ctx->wbuflen], buffer, add); memcpy(&ctx->wbuffer[in_buf], buffer, add);
ctx->wbuflen += add; in_buf += add;
/* If we still didn't collect full wbuffer, bail out */ /* If we still didn't collect full wbuffer, bail out */
if (ctx->wbuflen < sizeof(ctx->wbuffer)/2) if (in_buf < sizeof(ctx->wbuffer)/2)
return; return;
sha256_process_block64(ctx->wbuffer, 64, ctx); sha256_process_block64(ctx->wbuffer, 64, ctx);
ctx->wbuflen = 0;
buffer = (const char *)buffer + add; buffer = (const char *)buffer + add;
len -= add; len -= add;
} }
@ -454,7 +444,7 @@ void FAST_FUNC sha256_hash(const void *buffer, size_t len, sha256_ctx_t *ctx)
/* Process available complete blocks. */ /* Process available complete blocks. */
if (len >= 64) { if (len >= 64) {
if (UNALIGNED_P(buffer, uint32_t)) { if (UNALIGNED_P(buffer, uint32_t)) {
while (len > 64) { while (len >= 64) {
sha256_process_block64(memcpy(ctx->wbuffer, buffer, 64), 64, ctx); sha256_process_block64(memcpy(ctx->wbuffer, buffer, 64), 64, ctx);
buffer = (const char *)buffer + 64; buffer = (const char *)buffer + 64;
len -= 64; len -= 64;
@ -469,33 +459,40 @@ void FAST_FUNC sha256_hash(const void *buffer, size_t len, sha256_ctx_t *ctx)
/* Move remaining bytes into internal buffer. */ /* Move remaining bytes into internal buffer. */
if (len > 0) { if (len > 0) {
memcpy(ctx->wbuffer, buffer, len); memcpy(ctx->wbuffer, buffer, len);
ctx->wbuflen = len;
} }
} }
void FAST_FUNC sha512_hash(const void *buffer, size_t len, sha512_ctx_t *ctx) void FAST_FUNC sha512_hash(const void *buffer, size_t len, sha512_ctx_t *ctx)
{ {
if (ctx->wbuflen != 0) { unsigned in_buf = ctx->total64[0] & 127;
/* First increment the byte count. FIPS 180-2 specifies the possible
length of the file up to 2^128 _bits_.
We compute the number of _bytes_ and convert to bits later. */
ctx->total64[0] += len;
if (ctx->total64[0] < len)
ctx->total64[1]++;
if (in_buf != 0) {
unsigned add; unsigned add;
add = sizeof(ctx->wbuffer)/2 - ctx->wbuflen; add = sizeof(ctx->wbuffer)/2 - in_buf;
if (add > len) if (add > len)
add = len; add = len;
memcpy(&ctx->wbuffer[ctx->wbuflen], buffer, add); memcpy(&ctx->wbuffer[in_buf], buffer, add);
ctx->wbuflen += add; in_buf += add;
if (ctx->wbuflen < sizeof(ctx->wbuffer)/2) if (in_buf < sizeof(ctx->wbuffer)/2)
return; return;
sha512_process_block128(ctx->wbuffer, 128, ctx); sha512_process_block128(ctx->wbuffer, 128, ctx);
ctx->wbuflen = 0;
buffer = (const char *)buffer + add; buffer = (const char *)buffer + add;
len -= add; len -= add;
} }
if (len >= 128) { if (len >= 128) {
if (UNALIGNED_P(buffer, uint64_t)) { if (UNALIGNED_P(buffer, uint64_t)) {
while (len > 128) { while (len >= 128) {
sha512_process_block128(memcpy(ctx->wbuffer, buffer, 128), 128, ctx); sha512_process_block128(memcpy(ctx->wbuffer, buffer, 128), 128, ctx);
buffer = (const char *)buffer + 128; buffer = (const char *)buffer + 128;
len -= 128; len -= 128;
@ -509,20 +506,19 @@ void FAST_FUNC sha512_hash(const void *buffer, size_t len, sha512_ctx_t *ctx)
if (len > 0) { if (len > 0) {
memcpy(ctx->wbuffer, buffer, len); memcpy(ctx->wbuffer, buffer, len);
ctx->wbuflen = len;
} }
} }
void FAST_FUNC sha1_end(void *resbuf, sha1_ctx_t *ctx) void FAST_FUNC sha1_end(void *resbuf, sha1_ctx_t *ctx)
{ {
unsigned i, wbuflen, pad; unsigned i, pad, in_buf;
/* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */ /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
wbuflen = ctx->total64 & SHA1_MASK; in_buf = ctx->total64 & SHA1_MASK;
((uint8_t *)ctx->wbuffer)[wbuflen++] = 0x80; ((uint8_t *)ctx->wbuffer)[in_buf++] = 0x80;
pad = SHA1_BLOCK_SIZE - wbuflen; pad = SHA1_BLOCK_SIZE - in_buf;
memset(((uint8_t *)ctx->wbuffer) + wbuflen, 0, pad); memset(((uint8_t *)ctx->wbuffer) + in_buf, 0, pad);
/* We need 1+8 or more empty positions, one for the padding byte /* We need 1+8 or more empty positions, one for the padding byte
* (above) and eight for the length count. * (above) and eight for the length count.
@ -543,77 +539,61 @@ void FAST_FUNC sha1_end(void *resbuf, sha1_ctx_t *ctx)
sha1_process_block64(ctx); sha1_process_block64(ctx);
/* Extract the hash value as bytes in case resbuf is #if BB_LITTLE_ENDIAN
* misaligned for 32-bit words */ for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i) { ctx->hash[i] = htonl(ctx->hash[i]);
uint32_t t = ctx->hash[i]; #endif
t = ntohl(t); /* paranoia. this can be a macro */ memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
move_to_unaligned32(resbuf, t); /* ditto */
resbuf = (char*)resbuf + 4;
}
} }
/* Process the remaining bytes in the internal buffer and the usual
prolog according to the standard and write the result to RESBUF.
IMPORTANT: On some systems it is required that RESBUF is correctly
aligned for a 32 bits value. */
void FAST_FUNC sha256_end(void *resbuf, sha256_ctx_t *ctx) void FAST_FUNC sha256_end(void *resbuf, sha256_ctx_t *ctx)
{ {
/* Take yet unprocessed bytes into account. */ unsigned i, pad, in_buf;
unsigned bytes = ctx->wbuflen;
unsigned pad;
/* Now count remaining bytes. */
ctx->total64 += bytes;
/* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0...
(FIPS 180-2:5.1.1) */ (FIPS 180-2:5.1.1) */
pad = (bytes >= 56 ? 64 + 56 - bytes : 56 - bytes); in_buf = ctx->total64 & 63;
memset(&ctx->wbuffer[bytes], 0, pad); pad = (in_buf >= 56 ? 64 + 56 - in_buf : 56 - in_buf);
ctx->wbuffer[bytes] = 0x80; memset(&ctx->wbuffer[in_buf], 0, pad);
ctx->wbuffer[in_buf] = 0x80;
/* Put the 64-bit file length in *bits* at the end of the buffer. */ /* Put the 64-bit file length in *bits* at the end of the buffer. */
{ {
uint64_t t = ctx->total64 << 3; uint64_t t = ctx->total64 << 3;
t = hton64(t); t = hton64(t);
/* wbuffer is suitably aligned for this */ /* wbuffer is suitably aligned for this */
*(uint64_t *) &ctx->wbuffer[bytes + pad] = t; *(uint64_t *) &ctx->wbuffer[in_buf + pad] = t;
} }
/* Process last bytes. */ /* Process last bytes. */
sha256_process_block64(ctx->wbuffer, bytes + pad + 8, ctx); sha256_process_block64(ctx->wbuffer, in_buf + pad + 8, ctx);
for (unsigned i = 0; i < 8; ++i) #if BB_LITTLE_ENDIAN
((uint32_t *) resbuf)[i] = ntohl(ctx->H[i]); for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
ctx->hash[i] = htonl(ctx->hash[i]);
#endif
memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
} }
/* Process the remaining bytes in the internal buffer and the usual
prolog according to the standard and write the result to RESBUF.
IMPORTANT: On some systems it is required that RESBUF is correctly
aligned for a 64 bits value. */
void FAST_FUNC sha512_end(void *resbuf, sha512_ctx_t *ctx) void FAST_FUNC sha512_end(void *resbuf, sha512_ctx_t *ctx)
{ {
unsigned bytes = ctx->wbuflen; unsigned i, pad, in_buf;
unsigned pad;
ctx->total64[0] += bytes;
if (ctx->total64[0] < bytes)
ctx->total64[1]++;
/* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0...
(FIPS 180-2:5.1.2) */ (FIPS 180-2:5.1.2) */
pad = bytes >= 112 ? 128 + 112 - bytes : 112 - bytes; in_buf = ctx->total64[0] & 127;
memset(&ctx->wbuffer[bytes], 0, pad); pad = in_buf >= 112 ? 128 + 112 - in_buf : 112 - in_buf;
ctx->wbuffer[bytes] = 0x80; memset(&ctx->wbuffer[in_buf], 0, pad);
ctx->wbuffer[in_buf] = 0x80;
*(uint64_t *) &ctx->wbuffer[bytes + pad + 8] = hton64(ctx->total64[0] << 3); *(uint64_t *) &ctx->wbuffer[in_buf + pad + 8] = hton64(ctx->total64[0] << 3);
*(uint64_t *) &ctx->wbuffer[bytes + pad] = hton64((ctx->total64[1] << 3) | (ctx->total64[0] >> 61)); *(uint64_t *) &ctx->wbuffer[in_buf + pad] = hton64((ctx->total64[1] << 3) | (ctx->total64[0] >> 61));
sha512_process_block128(ctx->wbuffer, bytes + pad + 16, ctx); sha512_process_block128(ctx->wbuffer, in_buf + pad + 16, ctx);
for (unsigned i = 0; i < 8; ++i) #if BB_LITTLE_ENDIAN
((uint64_t *) resbuf)[i] = hton64(ctx->H[i]); for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
ctx->hash[i] = hton64(ctx->hash[i]);
#endif
memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
} }