sha3: code shrink
function old new delta KeccakF 1064 1053 -11 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		@@ -988,24 +988,29 @@ static void KeccakF(uint64_t *state)
 | 
				
			|||||||
	for (round = 0; round < cKeccakNumberOfRounds; ++round) {
 | 
						for (round = 0; round < cKeccakNumberOfRounds; ++round) {
 | 
				
			||||||
		/* Theta */
 | 
							/* Theta */
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			uint64_t BC[5];
 | 
								uint64_t BC[10];
 | 
				
			||||||
			for (x = 0; x < 5; ++x) {
 | 
								for (x = 0; x < 5; ++x) {
 | 
				
			||||||
				BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^
 | 
									BC[x + 5] = BC[x] = state[x]
 | 
				
			||||||
					state[15 + x] ^ state[20 + x];
 | 
										^ state[x + 5] ^ state[x + 10]
 | 
				
			||||||
 | 
										^ state[x + 15]	^ state[x + 20];
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
								/* Using 2x5 vector above eliminates the need to use
 | 
				
			||||||
 | 
								 * [Mod5[x+N]] index trick below to calculate (x+N) % 5,
 | 
				
			||||||
 | 
								 * and the code is a bit _smaller_.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
			for (x = 0; x < 5; ++x) {
 | 
								for (x = 0; x < 5; ++x) {
 | 
				
			||||||
				uint64_t temp = BC[KeccakF_Mod5[x + 4]] ^
 | 
									uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
 | 
				
			||||||
					rotl64(BC[KeccakF_Mod5[x + 1]], 1);
 | 
					 | 
				
			||||||
				if (SHA3_SMALL && !ARCH_IS_64BIT) {
 | 
									if (SHA3_SMALL && !ARCH_IS_64BIT) {
 | 
				
			||||||
	                    		for (y = 0; y <= 20; y += 5)
 | 
						                    		for (y = 0; y <= 20; y += 5)
 | 
				
			||||||
						state[y + x] ^= temp;
 | 
											state[x + y] ^= temp;
 | 
				
			||||||
				} else {
 | 
									} else {
 | 
				
			||||||
					/* on 64-bit arch, this is actually smaller too */
 | 
										/* On 64-bit, this is also smaller,
 | 
				
			||||||
					state[0 + x] ^= temp;
 | 
										 * not only faster, than loop */
 | 
				
			||||||
					state[5 + x] ^= temp;
 | 
										state[x] ^= temp;
 | 
				
			||||||
					state[10 + x] ^= temp;
 | 
										state[x + 5] ^= temp;
 | 
				
			||||||
					state[15 + x] ^= temp;
 | 
										state[x + 10] ^= temp;
 | 
				
			||||||
					state[20 + x] ^= temp;
 | 
										state[x + 15] ^= temp;
 | 
				
			||||||
 | 
										state[x + 20] ^= temp;
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@@ -1019,7 +1024,7 @@ static void KeccakF(uint64_t *state)
 | 
				
			|||||||
				t1 = t0;
 | 
									t1 = t0;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			/* Especially large benefit for 32-bit arch:
 | 
								/* Especially large benefit for 32-bit arch (75% faster):
 | 
				
			||||||
			 * 64-bit rotations by non-constant usually are SLOW on those.
 | 
								 * 64-bit rotations by non-constant usually are SLOW on those.
 | 
				
			||||||
			 * We resort to unrolling here.
 | 
								 * We resort to unrolling here.
 | 
				
			||||||
			 * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[],
 | 
								 * This optimizes out KeccakF_PiLane[] and KeccakF_RotationConstants[],
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user