libbb/sha1: x86_64 version: bswap in 64-bit chunks
function old new delta sha1_process_block64 3562 3570 +8 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		@@ -59,7 +59,7 @@ config SHA1_SMALL
 | 
			
		||||
	Trade binary size versus speed for the sha1 algorithm.
 | 
			
		||||
	                throughput MB/s   size of sha1_process_block64
 | 
			
		||||
	value           486  x86-64       486   x86-64
 | 
			
		||||
	0               367  367          3657  3562
 | 
			
		||||
	0               367  367          3657  3570
 | 
			
		||||
	1               224  229           654   732
 | 
			
		||||
	2,3             200  195           358   380
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -867,27 +867,29 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
 | 
			
		||||
	.endif                                                      \n\
 | 
			
		||||
	.endm                                                       \n\
 | 
			
		||||
                                                                    \n\
 | 
			
		||||
	movl	4*8(%rdi), %r8d                                     \n\
 | 
			
		||||
	bswap	%r8d                                                \n\
 | 
			
		||||
	movl	4*9(%rdi), %r9d                                     \n\
 | 
			
		||||
	bswap	%r9d                                                \n\
 | 
			
		||||
	movl	4*10(%rdi), %r10d                                   \n\
 | 
			
		||||
	bswap	%r10d                                               \n\
 | 
			
		||||
	movl	4*11(%rdi), %r11d                                   \n\
 | 
			
		||||
	bswap	%r11d                                               \n\
 | 
			
		||||
	movl	4*12(%rdi), %r12d                                   \n\
 | 
			
		||||
	bswap	%r12d                                               \n\
 | 
			
		||||
	movl	4*13(%rdi), %r13d                                   \n\
 | 
			
		||||
	bswap	%r13d                                               \n\
 | 
			
		||||
	movl	4*14(%rdi), %r14d                                   \n\
 | 
			
		||||
	bswap	%r14d                                               \n\
 | 
			
		||||
	movl	4*15(%rdi), %r15d                                   \n\
 | 
			
		||||
	bswap	%r15d                                               \n\
 | 
			
		||||
	movl	$7, %eax                                            \n\
 | 
			
		||||
	movq	4*8(%rdi), %r8                                      \n\
 | 
			
		||||
	bswap	%r8                                                 \n\
 | 
			
		||||
	movl	%r8d, %r9d                                          \n\
 | 
			
		||||
	shrq	$32, %r8                                            \n\
 | 
			
		||||
	movq	4*10(%rdi), %r10                                    \n\
 | 
			
		||||
	bswap	%r10                                                \n\
 | 
			
		||||
	movl	%r10d, %r11d                                        \n\
 | 
			
		||||
	shrq	$32, %r10                                           \n\
 | 
			
		||||
	movq	4*12(%rdi), %r12                                    \n\
 | 
			
		||||
	bswap	%r12                                                \n\
 | 
			
		||||
	movl	%r12d, %r13d                                        \n\
 | 
			
		||||
	shrq	$32, %r12                                           \n\
 | 
			
		||||
	movq	4*14(%rdi), %r14                                    \n\
 | 
			
		||||
	bswap	%r14                                                \n\
 | 
			
		||||
	movl	%r14d, %r15d                                        \n\
 | 
			
		||||
	shrq	$32, %r14                                           \n\
 | 
			
		||||
                                                                    \n\
 | 
			
		||||
	movl	$3, %eax                                            \n\
 | 
			
		||||
1:                                                                  \n\
 | 
			
		||||
	movl	(%rdi,%rax,4), %esi                                 \n\
 | 
			
		||||
	bswap	%esi                                                \n\
 | 
			
		||||
	movl	%esi, -32(%rsp,%rax,4)                              \n\
 | 
			
		||||
	movq	(%rdi,%rax,8), %rsi                                 \n\
 | 
			
		||||
	bswap	%rsi                                                \n\
 | 
			
		||||
	rolq	$32, %rsi                                           \n\
 | 
			
		||||
	movq	%rsi, -32(%rsp,%rax,8)                              \n\
 | 
			
		||||
	decl	%eax                                                \n\
 | 
			
		||||
	jns	1b                                                  \n\
 | 
			
		||||
	movl	80(%rdi), %eax	# a = ctx->hash[0]                  \n\
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user