bzip2: pass sorting params through EState* pointer

function                                             old     new   delta
mainGtU                                              499     515     +16
sendMTFValues                                       2085    2094      +9
mainSort                                            1116    1119      +3
generateMTFValues                                    357     356      -1
fallbackSort                                        1719    1705     -14
mainQSort3                                          1163    1141     -22
BZ2_blockSort                                        118      85     -33
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/4 up/down: 28/-70)            Total: -42 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2018-02-03 20:19:51 +01:00
parent 10f516500e
commit c9ae8d770b
3 changed files with 65 additions and 67 deletions

View File

@ -227,17 +227,19 @@ void fallbackQSort3(uint32_t* fmap,
#define UNALIGNED_BH(zz) ((zz) & 0x01f) #define UNALIGNED_BH(zz) ((zz) & 0x01f)
static static
void fallbackSort(uint32_t* fmap, void fallbackSort(EState* state)
uint32_t* eclass,
uint32_t* bhtab,
int32_t nblock)
{ {
int32_t ftab[257]; int32_t ftab[257];
int32_t ftabCopy[256]; int32_t ftabCopy[256];
int32_t H, i, j, k, l, r, cc, cc1; int32_t H, i, j, k, l, r, cc, cc1;
int32_t nNotDone; int32_t nNotDone;
int32_t nBhtab; int32_t nBhtab;
uint8_t* eclass8 = (uint8_t*)eclass; /* params */
uint32_t *const fmap = state->arr1;
uint32_t *const eclass = state->arr2;
#define eclass8 ((uint8_t*)eclass)
uint32_t *const bhtab = state->ftab;
const int32_t nblock = state->nblock;
/* /*
* Initial 1-char radix sort to generate * Initial 1-char radix sort to generate
@ -349,6 +351,7 @@ void fallbackSort(uint32_t* fmap,
eclass8[fmap[i]] = (uint8_t)j; eclass8[fmap[i]] = (uint8_t)j;
} }
AssertH(j < 256, 1005); AssertH(j < 256, 1005);
#undef eclass8
} }
#undef SET_BH #undef SET_BH
@ -367,18 +370,18 @@ void fallbackSort(uint32_t* fmap,
/*---------------------------------------------*/ /*---------------------------------------------*/
static static
NOINLINE NOINLINE
int mainGtU( int mainGtU(EState* state,
uint32_t i1, uint32_t i1,
uint32_t i2, uint32_t i2)
uint8_t* block,
uint16_t* quadrant,
uint32_t nblock,
int32_t* budget)
{ {
int32_t k; int32_t k;
uint8_t c1, c2; uint8_t c1, c2;
uint16_t s1, s2; uint16_t s1, s2;
uint8_t *const block = state->block;
uint16_t *const quadrant = state->quadrant;
const int32_t nblock = state->nblock;
/* Loop unrolling here is actually very useful /* Loop unrolling here is actually very useful
* (generated code is much simpler), * (generated code is much simpler),
* code size increase is only 270 bytes (i386) * code size increase is only 270 bytes (i386)
@ -435,7 +438,7 @@ int mainGtU(
if (i1 >= nblock) i1 -= nblock; if (i1 >= nblock) i1 -= nblock;
if (i2 >= nblock) i2 -= nblock; if (i2 >= nblock) i2 -= nblock;
(*budget)--; state->budget--;
k -= 8; k -= 8;
} while (k >= 0); } while (k >= 0);
@ -459,15 +462,13 @@ const uint32_t incs[14] = {
}; };
static static
void mainSimpleSort(uint32_t* ptr, void mainSimpleSort(EState* state,
uint8_t* block,
uint16_t* quadrant,
int32_t nblock,
int32_t lo, int32_t lo,
int32_t hi, int32_t hi,
int32_t d, int32_t d)
int32_t* budget)
{ {
uint32_t *const ptr = state->ptr;
/* At which increment to start? */ /* At which increment to start? */
int hp = 0; int hp = 0;
{ {
@ -492,7 +493,7 @@ void mainSimpleSort(uint32_t* ptr,
if (i > hi) break; if (i > hi) break;
v = ptr[i]; v = ptr[i];
j = i; j = i;
while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { while (mainGtU(state, ptr[j-h]+d, v+d)) {
ptr[j] = ptr[j-h]; ptr[j] = ptr[j-h];
j = j - h; j = j - h;
if (j <= (lo + h - 1)) break; if (j <= (lo + h - 1)) break;
@ -506,7 +507,7 @@ void mainSimpleSort(uint32_t* ptr,
if (i > hi) break; if (i > hi) break;
v = ptr[i]; v = ptr[i];
j = i; j = i;
while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { while (mainGtU(state, ptr[j-h]+d, v+d)) {
ptr[j] = ptr[j-h]; ptr[j] = ptr[j-h];
j = j - h; j = j - h;
if (j <= (lo + h - 1)) break; if (j <= (lo + h - 1)) break;
@ -517,7 +518,7 @@ void mainSimpleSort(uint32_t* ptr,
if (i > hi) break; if (i > hi) break;
v = ptr[i]; v = ptr[i];
j = i; j = i;
while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { while (mainGtU(state, ptr[j-h]+d, v+d)) {
ptr[j] = ptr[j-h]; ptr[j] = ptr[j-h];
j = j - h; j = j - h;
if (j <= (lo + h - 1)) break; if (j <= (lo + h - 1)) break;
@ -525,7 +526,7 @@ void mainSimpleSort(uint32_t* ptr,
ptr[j] = v; ptr[j] = v;
i++; i++;
#endif #endif
if (*budget < 0) return; if (state->budget < 0) return;
} }
} }
} }
@ -590,14 +591,10 @@ uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
#define MAIN_QSORT_STACK_SIZE 100 #define MAIN_QSORT_STACK_SIZE 100
static NOINLINE static NOINLINE
void mainQSort3(uint32_t* ptr, void mainQSort3(EState* state,
uint8_t* block,
uint16_t* quadrant,
int32_t nblock,
int32_t loSt, int32_t loSt,
int32_t hiSt, int32_t hiSt
/*int32_t dSt,*/ /*int32_t dSt*/)
int32_t* budget)
{ {
enum { dSt = BZ_N_RADIX }; enum { dSt = BZ_N_RADIX };
int32_t unLo, unHi, ltLo, gtHi, n, m, med; int32_t unLo, unHi, ltLo, gtHi, n, m, med;
@ -611,6 +608,9 @@ void mainQSort3(uint32_t* ptr,
int32_t nextHi[3]; int32_t nextHi[3];
int32_t nextD [3]; int32_t nextD [3];
uint32_t *const ptr = state->ptr;
uint8_t *const block = state->block;
sp = 0; sp = 0;
mpush(loSt, hiSt, dSt); mpush(loSt, hiSt, dSt);
@ -621,8 +621,8 @@ void mainQSort3(uint32_t* ptr,
if (hi - lo < MAIN_QSORT_SMALL_THRESH if (hi - lo < MAIN_QSORT_SMALL_THRESH
|| d > MAIN_QSORT_DEPTH_THRESH || d > MAIN_QSORT_DEPTH_THRESH
) { ) {
mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget); mainSimpleSort(state, lo, hi, d);
if (*budget < 0) if (state->budget < 0)
return; return;
continue; continue;
} }
@ -726,13 +726,7 @@ void mainQSort3(uint32_t* ptr,
#define CLEARMASK (~(SETMASK)) #define CLEARMASK (~(SETMASK))
static NOINLINE static NOINLINE
void mainSort(EState* state, void mainSort(EState* state)
uint32_t* ptr,
uint8_t* block,
uint16_t* quadrant,
uint32_t* ftab,
int32_t nblock,
int32_t* budget)
{ {
int32_t i, j; int32_t i, j;
Bool bigDone[256]; Bool bigDone[256];
@ -745,6 +739,12 @@ void mainSort(EState* state,
#define copyStart (state->mainSort__copyStart) #define copyStart (state->mainSort__copyStart)
#define copyEnd (state->mainSort__copyEnd) #define copyEnd (state->mainSort__copyEnd)
uint32_t *const ptr = state->ptr;
uint8_t *const block = state->block;
uint32_t *const ftab = state->ftab;
const int32_t nblock = state->nblock;
uint16_t *const quadrant = state->quadrant;
/*-- set up the 2-byte frequency table --*/ /*-- set up the 2-byte frequency table --*/
/* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */ /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
memset(ftab, 0, 65537 * sizeof(ftab[0])); memset(ftab, 0, 65537 * sizeof(ftab[0]));
@ -883,11 +883,8 @@ void mainSort(EState* state,
int32_t lo = ftab[sb] /*& CLEARMASK (redundant)*/; int32_t lo = ftab[sb] /*& CLEARMASK (redundant)*/;
int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
if (hi > lo) { if (hi > lo) {
mainQSort3( mainQSort3(state, lo, hi /*,BZ_N_RADIX*/);
ptr, block, quadrant, nblock, if (state->budget < 0) return;
lo, hi, /*BZ_N_RADIX,*/ budget
);
if (*budget < 0) return;
} }
} }
ftab[sb] |= SETMASK; ftab[sb] |= SETMASK;
@ -1025,31 +1022,25 @@ void mainSort(EState* state,
* arr1[0 .. nblock-1] holds sorted order * arr1[0 .. nblock-1] holds sorted order
*/ */
static NOINLINE static NOINLINE
void BZ2_blockSort(EState* s) void BZ2_blockSort(EState* state)
{ {
/* In original bzip2 1.0.4, it's a parameter, but 30 /* In original bzip2 1.0.4, it's a parameter, but 30
* (which was the default) should work ok. */ * (which was the default) should work ok. */
enum { wfact = 30 }; enum { wfact = 30 };
unsigned i;
uint32_t* ptr = s->ptr; if (state->nblock < 10000) {
uint8_t* block = s->block; fallbackSort(state);
uint32_t* ftab = s->ftab;
int32_t nblock = s->nblock;
uint16_t* quadrant;
int32_t budget;
int32_t i;
if (nblock < 10000) {
fallbackSort(s->arr1, s->arr2, ftab, nblock);
} else { } else {
/* Calculate the location for quadrant, remembering to get /* Calculate the location for quadrant, remembering to get
* the alignment right. Assumes that &(block[0]) is at least * the alignment right. Assumes that &(block[0]) is at least
* 2-byte aligned -- this should be ok since block is really * 2-byte aligned -- this should be ok since block is really
* the first section of arr2. * the first section of arr2.
*/ */
i = nblock + BZ_N_OVERSHOOT; i = state->nblock + BZ_N_OVERSHOOT;
if (i & 1) i++; if (i & 1)
quadrant = (uint16_t*)(&(block[i])); i++;
state->quadrant = (uint16_t*) &(state->block[i]);
/* (wfact-1) / 3 puts the default-factor-30 /* (wfact-1) / 3 puts the default-factor-30
* transition point at very roughly the same place as * transition point at very roughly the same place as
@ -1058,24 +1049,25 @@ void BZ2_blockSort(EState* s)
* resulting compressed stream is now the same regardless * resulting compressed stream is now the same regardless
* of whether or not we use the main sort or fallback sort. * of whether or not we use the main sort or fallback sort.
*/ */
budget = nblock * ((wfact-1) / 3); state->budget = state->nblock * ((wfact-1) / 3);
mainSort(s, ptr, block, quadrant, ftab, nblock, &budget); mainSort(state);
if (budget < 0) { if (state->budget < 0) {
fallbackSort(s->arr1, s->arr2, ftab, nblock); fallbackSort(state);
} }
} }
#if BZ_LIGHT_DEBUG #if BZ_LIGHT_DEBUG
s->origPtr = -1; state->origPtr = -1;
#endif #endif
for (i = 0; i < s->nblock; i++) for (i = 0; i < state->nblock; i++) {
if (ptr[i] == 0) { if (state->ptr[i] == 0) {
s->origPtr = i; state->origPtr = i;
break; break;
} }
}
AssertH(s->origPtr != -1, 1003); AssertH(state->origPtr != -1, 1003);
} }

View File

@ -87,7 +87,7 @@ int isempty_RL(EState* s)
static static
void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k) void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k)
{ {
int32_t n; unsigned n;
EState* s; EState* s;
s = xzalloc(sizeof(EState)); s = xzalloc(sizeof(EState));

View File

@ -121,6 +121,7 @@ typedef struct EState {
/* mode this stream is in, and whether inputting */ /* mode this stream is in, and whether inputting */
/* or outputting data */ /* or outputting data */
int32_t mode; int32_t mode;
//both smallint?
int32_t state; int32_t state;
/* remembers avail_in when flush/finish requested */ /* remembers avail_in when flush/finish requested */
@ -134,6 +135,9 @@ typedef struct EState {
uint32_t *arr2; uint32_t *arr2;
uint32_t *ftab; uint32_t *ftab;
uint16_t* quadrant;
int32_t budget;
/* aliases for arr1 and arr2 */ /* aliases for arr1 and arr2 */
uint32_t *ptr; uint32_t *ptr;
uint8_t *block; uint8_t *block;
@ -142,6 +146,7 @@ typedef struct EState {
/* guess what */ /* guess what */
uint32_t *crc32table; uint32_t *crc32table;
//move down
/* run-length-encoding of the input */ /* run-length-encoding of the input */
uint32_t state_in_ch; uint32_t state_in_ch;
@ -165,6 +170,7 @@ typedef struct EState {
/* misc administratium */ /* misc administratium */
int32_t blockNo; int32_t blockNo;
int32_t blockSize100k; int32_t blockSize100k;
//smallint?
/* stuff for coding the MTF values */ /* stuff for coding the MTF values */
int32_t nMTF; int32_t nMTF;