decompress_bunzip2: keep bd->writeCRC in CPU reg in the hot loop
-5 bytes on 64-bit, +7 bytes on 32-bit. Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
This commit is contained in:
parent
f29a1c5656
commit
bf3bec51fc
@ -492,15 +492,20 @@ static int get_next_block(bunzip_data *bd)
|
|||||||
int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
||||||
{
|
{
|
||||||
const uint32_t *dbuf;
|
const uint32_t *dbuf;
|
||||||
int pos, current, previous, gotcount;
|
int pos, current, previous, out_count;
|
||||||
|
uint32_t CRC;
|
||||||
|
|
||||||
/* If last read was short due to end of file, return last block now */
|
/* If we already have error/end indicator, return it */
|
||||||
if (bd->writeCount < 0) return bd->writeCount;
|
if (bd->writeCount < 0)
|
||||||
|
return bd->writeCount;
|
||||||
|
|
||||||
gotcount = 0;
|
out_count = 0;
|
||||||
dbuf = bd->dbuf;
|
dbuf = bd->dbuf;
|
||||||
|
|
||||||
|
/* Register-cached state (hopefully): */
|
||||||
pos = bd->writePos;
|
pos = bd->writePos;
|
||||||
current = bd->writeCurrent;
|
current = bd->writeCurrent;
|
||||||
|
CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */
|
||||||
|
|
||||||
/* We will always have pending decoded data to write into the output
|
/* We will always have pending decoded data to write into the output
|
||||||
buffer unless this is the very first call (in which case we haven't
|
buffer unless this is the very first call (in which case we haven't
|
||||||
@ -514,8 +519,8 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
|||||||
/* Loop outputting bytes */
|
/* Loop outputting bytes */
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
|
||||||
/* If the output buffer is full, snapshot state and return */
|
/* If the output buffer is full, save cached state and return */
|
||||||
if (gotcount >= len) {
|
if (out_count >= len) {
|
||||||
/* Unlikely branch.
|
/* Unlikely branch.
|
||||||
* Use of "goto" instead of keeping code here
|
* Use of "goto" instead of keeping code here
|
||||||
* helps compiler to realize this. */
|
* helps compiler to realize this. */
|
||||||
@ -523,17 +528,16 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Write next byte into output buffer, updating CRC */
|
/* Write next byte into output buffer, updating CRC */
|
||||||
outbuf[gotcount++] = current;
|
outbuf[out_count++] = current;
|
||||||
bd->writeCRC = (bd->writeCRC << 8)
|
CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current];
|
||||||
^ bd->crc32Table[(bd->writeCRC >> 24) ^ current];
|
|
||||||
|
|
||||||
/* Loop now if we're outputting multiple copies of this byte */
|
/* Loop now if we're outputting multiple copies of this byte */
|
||||||
if (bd->writeCopies) {
|
if (bd->writeCopies) {
|
||||||
/* Unlikely branch */
|
/* Unlikely branch */
|
||||||
/*--bd->writeCopies;*/
|
/*--bd->writeCopies;*/
|
||||||
/*continue;*/
|
/*continue;*/
|
||||||
/* Same, but (ab)using other existing --writeCopies operation.
|
/* Same, but (ab)using other existing --writeCopies operation
|
||||||
* Luckily, this also compiles into just one branch insn: */
|
* (and this if() compiles into just test+branch pair): */
|
||||||
goto dec_writeCopies;
|
goto dec_writeCopies;
|
||||||
}
|
}
|
||||||
decode_next_byte:
|
decode_next_byte:
|
||||||
@ -549,7 +553,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
|||||||
/* After 3 consecutive copies of the same byte, the 4th
|
/* After 3 consecutive copies of the same byte, the 4th
|
||||||
* is a repeat count. We count down from 4 instead
|
* is a repeat count. We count down from 4 instead
|
||||||
* of counting up because testing for non-zero is faster */
|
* of counting up because testing for non-zero is faster */
|
||||||
if (--bd->writeRunCountdown) {
|
if (--bd->writeRunCountdown != 0) {
|
||||||
if (current != previous)
|
if (current != previous)
|
||||||
bd->writeRunCountdown = 4;
|
bd->writeRunCountdown = 4;
|
||||||
} else {
|
} else {
|
||||||
@ -568,11 +572,11 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
|||||||
} /* for(;;) */
|
} /* for(;;) */
|
||||||
|
|
||||||
/* Decompression of this input block completed successfully */
|
/* Decompression of this input block completed successfully */
|
||||||
bd->writeCRC = ~bd->writeCRC;
|
bd->writeCRC = CRC = ~CRC;
|
||||||
bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC;
|
bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC;
|
||||||
|
|
||||||
/* If this block had a CRC error, force file level CRC error. */
|
/* If this block had a CRC error, force file level CRC error */
|
||||||
if (bd->writeCRC != bd->headerCRC) {
|
if (CRC != bd->headerCRC) {
|
||||||
bd->totalCRC = bd->headerCRC + 1;
|
bd->totalCRC = bd->headerCRC + 1;
|
||||||
return RETVAL_LAST_BLOCK;
|
return RETVAL_LAST_BLOCK;
|
||||||
}
|
}
|
||||||
@ -581,23 +585,26 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
|
|||||||
/* Refill the intermediate buffer by Huffman-decoding next block of input */
|
/* Refill the intermediate buffer by Huffman-decoding next block of input */
|
||||||
{
|
{
|
||||||
int r = get_next_block(bd);
|
int r = get_next_block(bd);
|
||||||
if (r) {
|
if (r) { /* error/end */
|
||||||
bd->writeCount = r;
|
bd->writeCount = r;
|
||||||
return (r != RETVAL_LAST_BLOCK) ? r : gotcount;
|
return (r != RETVAL_LAST_BLOCK) ? r : out_count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bd->writeCRC = ~0;
|
CRC = ~0;
|
||||||
pos = bd->writePos;
|
pos = bd->writePos;
|
||||||
current = bd->writeCurrent;
|
current = bd->writeCurrent;
|
||||||
goto decode_next_byte;
|
goto decode_next_byte;
|
||||||
|
|
||||||
outbuf_full:
|
outbuf_full:
|
||||||
/* Output buffer is full, snapshot state and return */
|
/* Output buffer is full, save cached state and return */
|
||||||
bd->writePos = pos;
|
bd->writePos = pos;
|
||||||
bd->writeCurrent = current;
|
bd->writeCurrent = current;
|
||||||
|
bd->writeCRC = CRC;
|
||||||
|
|
||||||
bd->writeCopies++;
|
bd->writeCopies++;
|
||||||
return gotcount;
|
|
||||||
|
return out_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
|
/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
|
||||||
|
Loading…
Reference in New Issue
Block a user