bzip2: expose tuning knob for faster/smaller code
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
2b7515722b
commit
4cae044b43
@ -19,6 +19,23 @@
|
|||||||
//config: Unless you have a specific application which requires bzip2, you
|
//config: Unless you have a specific application which requires bzip2, you
|
||||||
//config: should probably say N here.
|
//config: should probably say N here.
|
||||||
//config:
|
//config:
|
||||||
|
//config:config BZIP2_SMALL
|
||||||
|
//config: int "Trade bytes for speed (0:fast, 9:small)"
|
||||||
|
//config: default 8 # all "fast or small" options default to small
|
||||||
|
//config: range 0 9
|
||||||
|
//config: depends on BZIP2
|
||||||
|
//config: help
|
||||||
|
//config: Trade code size versus speed.
|
||||||
|
//config: Approximate values with gcc-6.3.0 "bzip -9" compressing
|
||||||
|
//config: linux-4.15.tar were:
|
||||||
|
//config: value time (sec) code size (386)
|
||||||
|
//config: 9 (smallest) 70.11 7687
|
||||||
|
//config: 8 67.93 8091
|
||||||
|
//config: 7 67.88 8405
|
||||||
|
//config: 6 67.78 8624
|
||||||
|
//config: 5 67.05 9427
|
||||||
|
//config: 4-0 (fastest) 64.14 12083
|
||||||
|
//config:
|
||||||
//config:config FEATURE_BZIP2_DECOMPRESS
|
//config:config FEATURE_BZIP2_DECOMPRESS
|
||||||
//config: bool "Enable decompression"
|
//config: bool "Enable decompression"
|
||||||
//config: default y
|
//config: default y
|
||||||
@ -48,7 +65,11 @@
|
|||||||
#include "libbb.h"
|
#include "libbb.h"
|
||||||
#include "bb_archive.h"
|
#include "bb_archive.h"
|
||||||
|
|
||||||
#define CONFIG_BZIP2_FAST 1
|
#if CONFIG_BZIP2_SMALL >= 4
|
||||||
|
#define BZIP2_SPEED (9 - CONFIG_BZIP2_SMALL)
|
||||||
|
#else
|
||||||
|
#define BZIP2_SPEED 5
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Speed test:
|
/* Speed test:
|
||||||
* Compiled with gcc 4.2.1, run on Athlon 64 1800 MHz (512K L2 cache).
|
* Compiled with gcc 4.2.1, run on Athlon 64 1800 MHz (512K L2 cache).
|
||||||
@ -56,7 +77,7 @@
|
|||||||
* (time to compress gcc-4.2.1.tar is 126.4% compared to bbox).
|
* (time to compress gcc-4.2.1.tar is 126.4% compared to bbox).
|
||||||
* At SPEED 5 difference is 32.7%.
|
* At SPEED 5 difference is 32.7%.
|
||||||
*
|
*
|
||||||
* Test run of all CONFIG_BZIP2_FAST values on a 11Mb text file:
|
* Test run of all BZIP2_SPEED values on a 11Mb text file:
|
||||||
* Size Time (3 runs)
|
* Size Time (3 runs)
|
||||||
* 0: 10828 4.145 4.146 4.148
|
* 0: 10828 4.145 4.146 4.148
|
||||||
* 1: 11097 3.845 3.860 3.861
|
* 1: 11097 3.845 3.860 3.861
|
||||||
|
@ -392,7 +392,7 @@ int mainGtU(EState* state,
|
|||||||
* but speeds up compression 10% overall
|
* but speeds up compression 10% overall
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if CONFIG_BZIP2_FAST >= 1
|
#if BZIP2_SPEED >= 1
|
||||||
|
|
||||||
#define TIMES_8(code) \
|
#define TIMES_8(code) \
|
||||||
code; code; code; code; \
|
code; code; code; code; \
|
||||||
@ -506,7 +506,7 @@ void mainSimpleSort(EState* state,
|
|||||||
i++;
|
i++;
|
||||||
|
|
||||||
/* 1.5% overall speedup, +290 bytes */
|
/* 1.5% overall speedup, +290 bytes */
|
||||||
#if CONFIG_BZIP2_FAST >= 3
|
#if BZIP2_SPEED >= 3
|
||||||
/*-- copy 2 --*/
|
/*-- copy 2 --*/
|
||||||
if (i > hi) break;
|
if (i > hi) break;
|
||||||
v = ptr[i];
|
v = ptr[i];
|
||||||
@ -755,7 +755,7 @@ void mainSort(EState* state)
|
|||||||
j = block[0] << 8;
|
j = block[0] << 8;
|
||||||
i = nblock - 1;
|
i = nblock - 1;
|
||||||
/* 3%, +300 bytes */
|
/* 3%, +300 bytes */
|
||||||
#if CONFIG_BZIP2_FAST >= 2
|
#if BZIP2_SPEED >= 2
|
||||||
for (; i >= 3; i -= 4) {
|
for (; i >= 3; i -= 4) {
|
||||||
quadrant[i] = 0;
|
quadrant[i] = 0;
|
||||||
j = (j >> 8) | (((unsigned)block[i]) << 8);
|
j = (j >> 8) | (((unsigned)block[i]) << 8);
|
||||||
@ -794,7 +794,7 @@ void mainSort(EState* state)
|
|||||||
unsigned s;
|
unsigned s;
|
||||||
s = block[0] << 8;
|
s = block[0] << 8;
|
||||||
i = nblock - 1;
|
i = nblock - 1;
|
||||||
#if CONFIG_BZIP2_FAST >= 2
|
#if BZIP2_SPEED >= 2
|
||||||
for (; i >= 3; i -= 4) {
|
for (; i >= 3; i -= 4) {
|
||||||
s = (s >> 8) | (block[i] << 8);
|
s = (s >> 8) | (block[i] << 8);
|
||||||
j = ftab[s] - 1;
|
j = ftab[s] - 1;
|
||||||
|
@ -188,7 +188,7 @@ typedef struct EState {
|
|||||||
/* stack-saving measures: these can be local, but they are too big */
|
/* stack-saving measures: these can be local, but they are too big */
|
||||||
int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
||||||
int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
|
||||||
#if CONFIG_BZIP2_FAST >= 5
|
#if BZIP2_SPEED >= 5
|
||||||
/* second dimension: only 3 needed; 4 makes index calculations faster */
|
/* second dimension: only 3 needed; 4 makes index calculations faster */
|
||||||
uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4];
|
uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4];
|
||||||
#endif
|
#endif
|
||||||
|
@ -32,6 +32,12 @@ in the file LICENSE.
|
|||||||
|
|
||||||
/* #include "bzlib_private.h" */
|
/* #include "bzlib_private.h" */
|
||||||
|
|
||||||
|
#if BZIP2_SPEED >= 5
|
||||||
|
# define ALWAYS_INLINE_5 ALWAYS_INLINE
|
||||||
|
#else
|
||||||
|
# define ALWAYS_INLINE_5 /*nothing*/
|
||||||
|
#endif
|
||||||
|
|
||||||
/*---------------------------------------------------*/
|
/*---------------------------------------------------*/
|
||||||
/*--- Bit stream I/O ---*/
|
/*--- Bit stream I/O ---*/
|
||||||
/*---------------------------------------------------*/
|
/*---------------------------------------------------*/
|
||||||
@ -60,9 +66,7 @@ void bsFinishWrite(EState* s)
|
|||||||
/*---------------------------------------------------*/
|
/*---------------------------------------------------*/
|
||||||
static
|
static
|
||||||
/* Helps only on level 5, on other levels hurts. ? */
|
/* Helps only on level 5, on other levels hurts. ? */
|
||||||
#if CONFIG_BZIP2_FAST >= 5
|
ALWAYS_INLINE_5
|
||||||
ALWAYS_INLINE
|
|
||||||
#endif
|
|
||||||
void bsW(EState* s, int32_t n, uint32_t v)
|
void bsW(EState* s, int32_t n, uint32_t v)
|
||||||
{
|
{
|
||||||
while (s->bsLive >= 8) {
|
while (s->bsLive >= 8) {
|
||||||
@ -75,9 +79,7 @@ void bsW(EState* s, int32_t n, uint32_t v)
|
|||||||
}
|
}
|
||||||
/* Same with n == 16: */
|
/* Same with n == 16: */
|
||||||
static
|
static
|
||||||
#if CONFIG_BZIP2_FAST >= 5
|
ALWAYS_INLINE_5
|
||||||
ALWAYS_INLINE
|
|
||||||
#endif
|
|
||||||
void bsW16(EState* s, uint32_t v)
|
void bsW16(EState* s, uint32_t v)
|
||||||
{
|
{
|
||||||
while (s->bsLive >= 8) {
|
while (s->bsLive >= 8) {
|
||||||
@ -103,9 +105,7 @@ void bsW1_1(EState* s)
|
|||||||
s->bsLive += 1;
|
s->bsLive += 1;
|
||||||
}
|
}
|
||||||
static
|
static
|
||||||
#if CONFIG_BZIP2_FAST >= 5
|
ALWAYS_INLINE_5
|
||||||
ALWAYS_INLINE
|
|
||||||
#endif
|
|
||||||
void bsW1_0(EState* s)
|
void bsW1_0(EState* s)
|
||||||
{
|
{
|
||||||
/* need space for only 1 bit, no need for loop freeing > 8 bits */
|
/* need space for only 1 bit, no need for loop freeing > 8 bits */
|
||||||
@ -394,7 +394,7 @@ void sendMTFValues(EState* s)
|
|||||||
s->rfreq[t][v] = 0;
|
s->rfreq[t][v] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CONFIG_BZIP2_FAST >= 5
|
#if BZIP2_SPEED >= 5
|
||||||
/*
|
/*
|
||||||
* Set up an auxiliary length table which is used to fast-track
|
* Set up an auxiliary length table which is used to fast-track
|
||||||
* the common case (nGroups == 6).
|
* the common case (nGroups == 6).
|
||||||
@ -427,7 +427,7 @@ void sendMTFValues(EState* s)
|
|||||||
*/
|
*/
|
||||||
for (t = 0; t < nGroups; t++)
|
for (t = 0; t < nGroups; t++)
|
||||||
cost[t] = 0;
|
cost[t] = 0;
|
||||||
#if CONFIG_BZIP2_FAST >= 5
|
#if BZIP2_SPEED >= 5
|
||||||
if (nGroups == 6 && 50 == ge-gs+1) {
|
if (nGroups == 6 && 50 == ge-gs+1) {
|
||||||
/*--- fast track the common case ---*/
|
/*--- fast track the common case ---*/
|
||||||
register uint32_t cost01, cost23, cost45;
|
register uint32_t cost01, cost23, cost45;
|
||||||
@ -483,7 +483,7 @@ void sendMTFValues(EState* s)
|
|||||||
* Increment the symbol frequencies for the selected table.
|
* Increment the symbol frequencies for the selected table.
|
||||||
*/
|
*/
|
||||||
/* 1% faster compress. +800 bytes */
|
/* 1% faster compress. +800 bytes */
|
||||||
#if CONFIG_BZIP2_FAST >= 4
|
#if BZIP2_SPEED >= 4
|
||||||
if (nGroups == 6 && 50 == ge-gs+1) {
|
if (nGroups == 6 && 50 == ge-gs+1) {
|
||||||
/*--- fast track the common case ---*/
|
/*--- fast track the common case ---*/
|
||||||
#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++
|
#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++
|
||||||
|
@ -48,7 +48,7 @@ in the file LICENSE.
|
|||||||
|
|
||||||
|
|
||||||
/* 90 bytes, 0.3% of overall compress speed */
|
/* 90 bytes, 0.3% of overall compress speed */
|
||||||
#if CONFIG_BZIP2_FAST >= 1
|
#if BZIP2_SPEED >= 1
|
||||||
|
|
||||||
/* macro works better than inline (gcc 4.2.1) */
|
/* macro works better than inline (gcc 4.2.1) */
|
||||||
#define DOWNHEAP1(heap, weight, Heap) \
|
#define DOWNHEAP1(heap, weight, Heap) \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user