libbb/sha1: x86_64 version: bswap in 64-bit chunks
function old new delta sha1_process_block64 3562 3570 +8 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		| @@ -59,7 +59,7 @@ config SHA1_SMALL | ||||
| 	Trade binary size versus speed for the sha1 algorithm. | ||||
| 	                throughput MB/s   size of sha1_process_block64 | ||||
| 	value           486  x86-64       486   x86-64 | ||||
| 	0               367  367          3657  3562 | ||||
| 	0               367  367          3657  3570 | ||||
| 	1               224  229           654   732 | ||||
| 	2,3             200  195           358   380 | ||||
|  | ||||
|   | ||||
| @@ -867,27 +867,29 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) | ||||
| 	.endif                                                      \n\ | ||||
| 	.endm                                                       \n\ | ||||
|                                                                     \n\ | ||||
| 	movl	4*8(%rdi), %r8d                                     \n\ | ||||
| 	bswap	%r8d                                                \n\ | ||||
| 	movl	4*9(%rdi), %r9d                                     \n\ | ||||
| 	bswap	%r9d                                                \n\ | ||||
| 	movl	4*10(%rdi), %r10d                                   \n\ | ||||
| 	bswap	%r10d                                               \n\ | ||||
| 	movl	4*11(%rdi), %r11d                                   \n\ | ||||
| 	bswap	%r11d                                               \n\ | ||||
| 	movl	4*12(%rdi), %r12d                                   \n\ | ||||
| 	bswap	%r12d                                               \n\ | ||||
| 	movl	4*13(%rdi), %r13d                                   \n\ | ||||
| 	bswap	%r13d                                               \n\ | ||||
| 	movl	4*14(%rdi), %r14d                                   \n\ | ||||
| 	bswap	%r14d                                               \n\ | ||||
| 	movl	4*15(%rdi), %r15d                                   \n\ | ||||
| 	bswap	%r15d                                               \n\ | ||||
| 	movl	$7, %eax                                            \n\ | ||||
| 	movq	4*8(%rdi), %r8                                      \n\ | ||||
| 	bswap	%r8                                                 \n\ | ||||
| 	movl	%r8d, %r9d                                          \n\ | ||||
| 	shrq	$32, %r8                                            \n\ | ||||
| 	movq	4*10(%rdi), %r10                                    \n\ | ||||
| 	bswap	%r10                                                \n\ | ||||
| 	movl	%r10d, %r11d                                        \n\ | ||||
| 	shrq	$32, %r10                                           \n\ | ||||
| 	movq	4*12(%rdi), %r12                                    \n\ | ||||
| 	bswap	%r12                                                \n\ | ||||
| 	movl	%r12d, %r13d                                        \n\ | ||||
| 	shrq	$32, %r12                                           \n\ | ||||
| 	movq	4*14(%rdi), %r14                                    \n\ | ||||
| 	bswap	%r14                                                \n\ | ||||
| 	movl	%r14d, %r15d                                        \n\ | ||||
| 	shrq	$32, %r14                                           \n\ | ||||
|                                                                     \n\ | ||||
| 	movl	$3, %eax                                            \n\ | ||||
| 1:                                                                  \n\ | ||||
| 	movl	(%rdi,%rax,4), %esi                                 \n\ | ||||
| 	bswap	%esi                                                \n\ | ||||
| 	movl	%esi, -32(%rsp,%rax,4)                              \n\ | ||||
| 	movq	(%rdi,%rax,8), %rsi                                 \n\ | ||||
| 	bswap	%rsi                                                \n\ | ||||
| 	rolq	$32, %rsi                                           \n\ | ||||
| 	movq	%rsi, -32(%rsp,%rax,8)                              \n\ | ||||
| 	decl	%eax                                                \n\ | ||||
| 	jns	1b                                                  \n\ | ||||
| 	movl	80(%rdi), %eax	# a = ctx->hash[0]                  \n\ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user