bzip2: pass sorting params through EState* pointer
function old new delta mainGtU 499 515 +16 sendMTFValues 2085 2094 +9 mainSort 1116 1119 +3 generateMTFValues 357 356 -1 fallbackSort 1719 1705 -14 mainQSort3 1163 1141 -22 BZ2_blockSort 118 85 -33 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/4 up/down: 28/-70) Total: -42 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
10f516500e
commit
c9ae8d770b
@ -227,17 +227,19 @@ void fallbackQSort3(uint32_t* fmap,
|
|||||||
#define UNALIGNED_BH(zz) ((zz) & 0x01f)
|
#define UNALIGNED_BH(zz) ((zz) & 0x01f)
|
||||||
|
|
||||||
static
|
static
|
||||||
void fallbackSort(uint32_t* fmap,
|
void fallbackSort(EState* state)
|
||||||
uint32_t* eclass,
|
|
||||||
uint32_t* bhtab,
|
|
||||||
int32_t nblock)
|
|
||||||
{
|
{
|
||||||
int32_t ftab[257];
|
int32_t ftab[257];
|
||||||
int32_t ftabCopy[256];
|
int32_t ftabCopy[256];
|
||||||
int32_t H, i, j, k, l, r, cc, cc1;
|
int32_t H, i, j, k, l, r, cc, cc1;
|
||||||
int32_t nNotDone;
|
int32_t nNotDone;
|
||||||
int32_t nBhtab;
|
int32_t nBhtab;
|
||||||
uint8_t* eclass8 = (uint8_t*)eclass;
|
/* params */
|
||||||
|
uint32_t *const fmap = state->arr1;
|
||||||
|
uint32_t *const eclass = state->arr2;
|
||||||
|
#define eclass8 ((uint8_t*)eclass)
|
||||||
|
uint32_t *const bhtab = state->ftab;
|
||||||
|
const int32_t nblock = state->nblock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initial 1-char radix sort to generate
|
* Initial 1-char radix sort to generate
|
||||||
@ -349,6 +351,7 @@ void fallbackSort(uint32_t* fmap,
|
|||||||
eclass8[fmap[i]] = (uint8_t)j;
|
eclass8[fmap[i]] = (uint8_t)j;
|
||||||
}
|
}
|
||||||
AssertH(j < 256, 1005);
|
AssertH(j < 256, 1005);
|
||||||
|
#undef eclass8
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef SET_BH
|
#undef SET_BH
|
||||||
@ -367,18 +370,18 @@ void fallbackSort(uint32_t* fmap,
|
|||||||
/*---------------------------------------------*/
|
/*---------------------------------------------*/
|
||||||
static
|
static
|
||||||
NOINLINE
|
NOINLINE
|
||||||
int mainGtU(
|
int mainGtU(EState* state,
|
||||||
uint32_t i1,
|
uint32_t i1,
|
||||||
uint32_t i2,
|
uint32_t i2)
|
||||||
uint8_t* block,
|
|
||||||
uint16_t* quadrant,
|
|
||||||
uint32_t nblock,
|
|
||||||
int32_t* budget)
|
|
||||||
{
|
{
|
||||||
int32_t k;
|
int32_t k;
|
||||||
uint8_t c1, c2;
|
uint8_t c1, c2;
|
||||||
uint16_t s1, s2;
|
uint16_t s1, s2;
|
||||||
|
|
||||||
|
uint8_t *const block = state->block;
|
||||||
|
uint16_t *const quadrant = state->quadrant;
|
||||||
|
const int32_t nblock = state->nblock;
|
||||||
|
|
||||||
/* Loop unrolling here is actually very useful
|
/* Loop unrolling here is actually very useful
|
||||||
* (generated code is much simpler),
|
* (generated code is much simpler),
|
||||||
* code size increase is only 270 bytes (i386)
|
* code size increase is only 270 bytes (i386)
|
||||||
@ -435,7 +438,7 @@ int mainGtU(
|
|||||||
if (i1 >= nblock) i1 -= nblock;
|
if (i1 >= nblock) i1 -= nblock;
|
||||||
if (i2 >= nblock) i2 -= nblock;
|
if (i2 >= nblock) i2 -= nblock;
|
||||||
|
|
||||||
(*budget)--;
|
state->budget--;
|
||||||
k -= 8;
|
k -= 8;
|
||||||
} while (k >= 0);
|
} while (k >= 0);
|
||||||
|
|
||||||
@ -459,15 +462,13 @@ const uint32_t incs[14] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static
|
static
|
||||||
void mainSimpleSort(uint32_t* ptr,
|
void mainSimpleSort(EState* state,
|
||||||
uint8_t* block,
|
|
||||||
uint16_t* quadrant,
|
|
||||||
int32_t nblock,
|
|
||||||
int32_t lo,
|
int32_t lo,
|
||||||
int32_t hi,
|
int32_t hi,
|
||||||
int32_t d,
|
int32_t d)
|
||||||
int32_t* budget)
|
|
||||||
{
|
{
|
||||||
|
uint32_t *const ptr = state->ptr;
|
||||||
|
|
||||||
/* At which increment to start? */
|
/* At which increment to start? */
|
||||||
int hp = 0;
|
int hp = 0;
|
||||||
{
|
{
|
||||||
@ -492,7 +493,7 @@ void mainSimpleSort(uint32_t* ptr,
|
|||||||
if (i > hi) break;
|
if (i > hi) break;
|
||||||
v = ptr[i];
|
v = ptr[i];
|
||||||
j = i;
|
j = i;
|
||||||
while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
|
while (mainGtU(state, ptr[j-h]+d, v+d)) {
|
||||||
ptr[j] = ptr[j-h];
|
ptr[j] = ptr[j-h];
|
||||||
j = j - h;
|
j = j - h;
|
||||||
if (j <= (lo + h - 1)) break;
|
if (j <= (lo + h - 1)) break;
|
||||||
@ -506,7 +507,7 @@ void mainSimpleSort(uint32_t* ptr,
|
|||||||
if (i > hi) break;
|
if (i > hi) break;
|
||||||
v = ptr[i];
|
v = ptr[i];
|
||||||
j = i;
|
j = i;
|
||||||
while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
|
while (mainGtU(state, ptr[j-h]+d, v+d)) {
|
||||||
ptr[j] = ptr[j-h];
|
ptr[j] = ptr[j-h];
|
||||||
j = j - h;
|
j = j - h;
|
||||||
if (j <= (lo + h - 1)) break;
|
if (j <= (lo + h - 1)) break;
|
||||||
@ -517,7 +518,7 @@ void mainSimpleSort(uint32_t* ptr,
|
|||||||
if (i > hi) break;
|
if (i > hi) break;
|
||||||
v = ptr[i];
|
v = ptr[i];
|
||||||
j = i;
|
j = i;
|
||||||
while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {
|
while (mainGtU(state, ptr[j-h]+d, v+d)) {
|
||||||
ptr[j] = ptr[j-h];
|
ptr[j] = ptr[j-h];
|
||||||
j = j - h;
|
j = j - h;
|
||||||
if (j <= (lo + h - 1)) break;
|
if (j <= (lo + h - 1)) break;
|
||||||
@ -525,7 +526,7 @@ void mainSimpleSort(uint32_t* ptr,
|
|||||||
ptr[j] = v;
|
ptr[j] = v;
|
||||||
i++;
|
i++;
|
||||||
#endif
|
#endif
|
||||||
if (*budget < 0) return;
|
if (state->budget < 0) return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -590,14 +591,10 @@ uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c)
|
|||||||
#define MAIN_QSORT_STACK_SIZE 100
|
#define MAIN_QSORT_STACK_SIZE 100
|
||||||
|
|
||||||
static NOINLINE
|
static NOINLINE
|
||||||
void mainQSort3(uint32_t* ptr,
|
void mainQSort3(EState* state,
|
||||||
uint8_t* block,
|
|
||||||
uint16_t* quadrant,
|
|
||||||
int32_t nblock,
|
|
||||||
int32_t loSt,
|
int32_t loSt,
|
||||||
int32_t hiSt,
|
int32_t hiSt
|
||||||
/*int32_t dSt,*/
|
/*int32_t dSt*/)
|
||||||
int32_t* budget)
|
|
||||||
{
|
{
|
||||||
enum { dSt = BZ_N_RADIX };
|
enum { dSt = BZ_N_RADIX };
|
||||||
int32_t unLo, unHi, ltLo, gtHi, n, m, med;
|
int32_t unLo, unHi, ltLo, gtHi, n, m, med;
|
||||||
@ -611,6 +608,9 @@ void mainQSort3(uint32_t* ptr,
|
|||||||
int32_t nextHi[3];
|
int32_t nextHi[3];
|
||||||
int32_t nextD [3];
|
int32_t nextD [3];
|
||||||
|
|
||||||
|
uint32_t *const ptr = state->ptr;
|
||||||
|
uint8_t *const block = state->block;
|
||||||
|
|
||||||
sp = 0;
|
sp = 0;
|
||||||
mpush(loSt, hiSt, dSt);
|
mpush(loSt, hiSt, dSt);
|
||||||
|
|
||||||
@ -621,8 +621,8 @@ void mainQSort3(uint32_t* ptr,
|
|||||||
if (hi - lo < MAIN_QSORT_SMALL_THRESH
|
if (hi - lo < MAIN_QSORT_SMALL_THRESH
|
||||||
|| d > MAIN_QSORT_DEPTH_THRESH
|
|| d > MAIN_QSORT_DEPTH_THRESH
|
||||||
) {
|
) {
|
||||||
mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
|
mainSimpleSort(state, lo, hi, d);
|
||||||
if (*budget < 0)
|
if (state->budget < 0)
|
||||||
return;
|
return;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -726,13 +726,7 @@ void mainQSort3(uint32_t* ptr,
|
|||||||
#define CLEARMASK (~(SETMASK))
|
#define CLEARMASK (~(SETMASK))
|
||||||
|
|
||||||
static NOINLINE
|
static NOINLINE
|
||||||
void mainSort(EState* state,
|
void mainSort(EState* state)
|
||||||
uint32_t* ptr,
|
|
||||||
uint8_t* block,
|
|
||||||
uint16_t* quadrant,
|
|
||||||
uint32_t* ftab,
|
|
||||||
int32_t nblock,
|
|
||||||
int32_t* budget)
|
|
||||||
{
|
{
|
||||||
int32_t i, j;
|
int32_t i, j;
|
||||||
Bool bigDone[256];
|
Bool bigDone[256];
|
||||||
@ -745,6 +739,12 @@ void mainSort(EState* state,
|
|||||||
#define copyStart (state->mainSort__copyStart)
|
#define copyStart (state->mainSort__copyStart)
|
||||||
#define copyEnd (state->mainSort__copyEnd)
|
#define copyEnd (state->mainSort__copyEnd)
|
||||||
|
|
||||||
|
uint32_t *const ptr = state->ptr;
|
||||||
|
uint8_t *const block = state->block;
|
||||||
|
uint32_t *const ftab = state->ftab;
|
||||||
|
const int32_t nblock = state->nblock;
|
||||||
|
uint16_t *const quadrant = state->quadrant;
|
||||||
|
|
||||||
/*-- set up the 2-byte frequency table --*/
|
/*-- set up the 2-byte frequency table --*/
|
||||||
/* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
|
/* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
|
||||||
memset(ftab, 0, 65537 * sizeof(ftab[0]));
|
memset(ftab, 0, 65537 * sizeof(ftab[0]));
|
||||||
@ -883,11 +883,8 @@ void mainSort(EState* state,
|
|||||||
int32_t lo = ftab[sb] /*& CLEARMASK (redundant)*/;
|
int32_t lo = ftab[sb] /*& CLEARMASK (redundant)*/;
|
||||||
int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
|
int32_t hi = (ftab[sb+1] & CLEARMASK) - 1;
|
||||||
if (hi > lo) {
|
if (hi > lo) {
|
||||||
mainQSort3(
|
mainQSort3(state, lo, hi /*,BZ_N_RADIX*/);
|
||||||
ptr, block, quadrant, nblock,
|
if (state->budget < 0) return;
|
||||||
lo, hi, /*BZ_N_RADIX,*/ budget
|
|
||||||
);
|
|
||||||
if (*budget < 0) return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ftab[sb] |= SETMASK;
|
ftab[sb] |= SETMASK;
|
||||||
@ -1025,31 +1022,25 @@ void mainSort(EState* state,
|
|||||||
* arr1[0 .. nblock-1] holds sorted order
|
* arr1[0 .. nblock-1] holds sorted order
|
||||||
*/
|
*/
|
||||||
static NOINLINE
|
static NOINLINE
|
||||||
void BZ2_blockSort(EState* s)
|
void BZ2_blockSort(EState* state)
|
||||||
{
|
{
|
||||||
/* In original bzip2 1.0.4, it's a parameter, but 30
|
/* In original bzip2 1.0.4, it's a parameter, but 30
|
||||||
* (which was the default) should work ok. */
|
* (which was the default) should work ok. */
|
||||||
enum { wfact = 30 };
|
enum { wfact = 30 };
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
uint32_t* ptr = s->ptr;
|
if (state->nblock < 10000) {
|
||||||
uint8_t* block = s->block;
|
fallbackSort(state);
|
||||||
uint32_t* ftab = s->ftab;
|
|
||||||
int32_t nblock = s->nblock;
|
|
||||||
uint16_t* quadrant;
|
|
||||||
int32_t budget;
|
|
||||||
int32_t i;
|
|
||||||
|
|
||||||
if (nblock < 10000) {
|
|
||||||
fallbackSort(s->arr1, s->arr2, ftab, nblock);
|
|
||||||
} else {
|
} else {
|
||||||
/* Calculate the location for quadrant, remembering to get
|
/* Calculate the location for quadrant, remembering to get
|
||||||
* the alignment right. Assumes that &(block[0]) is at least
|
* the alignment right. Assumes that &(block[0]) is at least
|
||||||
* 2-byte aligned -- this should be ok since block is really
|
* 2-byte aligned -- this should be ok since block is really
|
||||||
* the first section of arr2.
|
* the first section of arr2.
|
||||||
*/
|
*/
|
||||||
i = nblock + BZ_N_OVERSHOOT;
|
i = state->nblock + BZ_N_OVERSHOOT;
|
||||||
if (i & 1) i++;
|
if (i & 1)
|
||||||
quadrant = (uint16_t*)(&(block[i]));
|
i++;
|
||||||
|
state->quadrant = (uint16_t*) &(state->block[i]);
|
||||||
|
|
||||||
/* (wfact-1) / 3 puts the default-factor-30
|
/* (wfact-1) / 3 puts the default-factor-30
|
||||||
* transition point at very roughly the same place as
|
* transition point at very roughly the same place as
|
||||||
@ -1058,24 +1049,25 @@ void BZ2_blockSort(EState* s)
|
|||||||
* resulting compressed stream is now the same regardless
|
* resulting compressed stream is now the same regardless
|
||||||
* of whether or not we use the main sort or fallback sort.
|
* of whether or not we use the main sort or fallback sort.
|
||||||
*/
|
*/
|
||||||
budget = nblock * ((wfact-1) / 3);
|
state->budget = state->nblock * ((wfact-1) / 3);
|
||||||
|
|
||||||
mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
|
mainSort(state);
|
||||||
if (budget < 0) {
|
if (state->budget < 0) {
|
||||||
fallbackSort(s->arr1, s->arr2, ftab, nblock);
|
fallbackSort(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BZ_LIGHT_DEBUG
|
#if BZ_LIGHT_DEBUG
|
||||||
s->origPtr = -1;
|
state->origPtr = -1;
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < s->nblock; i++)
|
for (i = 0; i < state->nblock; i++) {
|
||||||
if (ptr[i] == 0) {
|
if (state->ptr[i] == 0) {
|
||||||
s->origPtr = i;
|
state->origPtr = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
AssertH(s->origPtr != -1, 1003);
|
AssertH(state->origPtr != -1, 1003);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -87,7 +87,7 @@ int isempty_RL(EState* s)
|
|||||||
static
|
static
|
||||||
void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k)
|
void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k)
|
||||||
{
|
{
|
||||||
int32_t n;
|
unsigned n;
|
||||||
EState* s;
|
EState* s;
|
||||||
|
|
||||||
s = xzalloc(sizeof(EState));
|
s = xzalloc(sizeof(EState));
|
||||||
|
@ -121,6 +121,7 @@ typedef struct EState {
|
|||||||
/* mode this stream is in, and whether inputting */
|
/* mode this stream is in, and whether inputting */
|
||||||
/* or outputting data */
|
/* or outputting data */
|
||||||
int32_t mode;
|
int32_t mode;
|
||||||
|
//both smallint?
|
||||||
int32_t state;
|
int32_t state;
|
||||||
|
|
||||||
/* remembers avail_in when flush/finish requested */
|
/* remembers avail_in when flush/finish requested */
|
||||||
@ -134,6 +135,9 @@ typedef struct EState {
|
|||||||
uint32_t *arr2;
|
uint32_t *arr2;
|
||||||
uint32_t *ftab;
|
uint32_t *ftab;
|
||||||
|
|
||||||
|
uint16_t* quadrant;
|
||||||
|
int32_t budget;
|
||||||
|
|
||||||
/* aliases for arr1 and arr2 */
|
/* aliases for arr1 and arr2 */
|
||||||
uint32_t *ptr;
|
uint32_t *ptr;
|
||||||
uint8_t *block;
|
uint8_t *block;
|
||||||
@ -142,6 +146,7 @@ typedef struct EState {
|
|||||||
|
|
||||||
/* guess what */
|
/* guess what */
|
||||||
uint32_t *crc32table;
|
uint32_t *crc32table;
|
||||||
|
//move down
|
||||||
|
|
||||||
/* run-length-encoding of the input */
|
/* run-length-encoding of the input */
|
||||||
uint32_t state_in_ch;
|
uint32_t state_in_ch;
|
||||||
@ -165,6 +170,7 @@ typedef struct EState {
|
|||||||
/* misc administratium */
|
/* misc administratium */
|
||||||
int32_t blockNo;
|
int32_t blockNo;
|
||||||
int32_t blockSize100k;
|
int32_t blockSize100k;
|
||||||
|
//smallint?
|
||||||
|
|
||||||
/* stuff for coding the MTF values */
|
/* stuff for coding the MTF values */
|
||||||
int32_t nMTF;
|
int32_t nMTF;
|
||||||
|
Loading…
Reference in New Issue
Block a user