tls: make RIGHTSHIFTX() in AES-GCM faster

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2018-12-08 13:34:43 +01:00
parent f522dd9420
commit 9f00a0fdb1

View File

@ -35,17 +35,66 @@ static ALWAYS_INLINE void FlattenSzInBits(byte* buf, word32 sz)
static void RIGHTSHIFTX(byte* x)
{
int i;
int carryOut = 0;
int carryIn = 0;
int borrow = x[15] & 0x01;
#define l ((unsigned long*)x)
#if 0
// Generic byte-at-a-time algorithm
int i;
byte carryIn = (x[15] & 0x01) ? 0xE1 : 0;
for (i = 0; i < AES_BLOCK_SIZE; i++) {
carryOut = x[i] & 0x01;
x[i] = (x[i] >> 1) | (carryIn ? 0x80 : 0);
byte carryOut = (x[i] << 7); // zero, or 0x80
x[i] = (x[i] >> 1) ^ carryIn;
carryIn = carryOut;
}
if (borrow) x[0] ^= 0xE1;
#elif BB_BIG_ENDIAN
// Big-endian can shift-right in larger than byte chunks
// (we use the fact that 'x' is long-aligned)
unsigned long carryIn = (x[15] & 0x01)
? ((unsigned long)0xE1 << (LONG_BIT-8))
: 0;
# if ULONG_MAX <= 0xffffffff
int i;
for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) {
unsigned long carryOut = l[i] << (LONG_BIT-1); // zero, or 0x800..00
l[i] = (l[i] >> 1) ^ carryIn;
carryIn = carryOut;
}
# else
// 64-bit code: need to process only 2 words
unsigned long carryOut = l[0] << (LONG_BIT-1); // zero, or 0x800..00
l[0] = (l[0] >> 1) ^ carryIn;
l[1] = (l[1] >> 1) ^ carryOut;
# endif
#else /* LITTLE_ENDIAN */
// In order to use word-sized ops, little-endian needs to byteswap.
// On x86, code size increase is ~10 bytes compared to byte-by-byte.
unsigned long carryIn = (x[15] & 0x01)
? ((unsigned long)0xE1 << (LONG_BIT-8))
: 0;
# if ULONG_MAX <= 0xffffffff
int i;
for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) {
unsigned long ti = SWAP_BE32(l[i]);
unsigned long carryOut = ti << (LONG_BIT-1); // zero, or 0x800..00
ti = (ti >> 1) ^ carryIn;
l[i] = SWAP_BE32(ti);
carryIn = carryOut;
}
# else
// 64-bit code: need to process only 2 words
unsigned long tt = SWAP_BE64(l[0]);
unsigned long carryOut = tt << (LONG_BIT-1); // zero, or 0x800..00
tt = (tt >> 1) ^ carryIn; l[0] = SWAP_BE64(tt);
tt = SWAP_BE64(l[1]);
tt = (tt >> 1) ^ carryOut; l[1] = SWAP_BE64(tt);
# endif
#endif /* LITTLE_ENDIAN */
#undef l
}
static void GMULT(byte* X, byte* Y)