awk: don't append bogus data after NUL in sub(); shrink

also renamed variables to more sensible names

function                                             old     new   delta
mk_re_node                                            56      49      -7
awk_sub                                              601     591     -10

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2010-04-04 01:17:30 +02:00
parent 243ddcbc76
commit fab288cf0b

View File

@ -1134,15 +1134,13 @@ static node *new_node(uint32_t info)
return n; return n;
} }
static node *mk_re_node(const char *s, node *n, regex_t *re) static void mk_re_node(const char *s, node *n, regex_t *re)
{ {
n->info = OC_REGEXP; n->info = OC_REGEXP;
n->l.re = re; n->l.re = re;
n->r.ire = re + 1; n->r.ire = re + 1;
xregcomp(re, s, REG_EXTENDED); xregcomp(re, s, REG_EXTENDED);
xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
return n;
} }
static node *condition(void) static node *condition(void)
@ -1541,7 +1539,10 @@ static regex_t *as_regex(node *op, regex_t *preg)
return preg; return preg;
} }
/* gradually increasing buffer */ /* gradually increasing buffer.
* note that we reallocate even if n == old_size,
* and thus there is at least one extra allocated byte.
*/
static char* qrealloc(char *b, int n, int *size) static char* qrealloc(char *b, int n, int *size)
{ {
if (!b || n >= *size) { if (!b || n >= *size) {
@ -1983,83 +1984,100 @@ static char *awk_printf(node *n)
return b; return b;
} }
/* common substitution routine /* Common substitution routine.
* replace (nm) substring of (src) that match (n) with (repl), store * Replace (nm)'th substring of (src) that matches (rn) with (repl),
* result into (dest), return number of substitutions. If nm=0, replace * store result into (dest), return number of substitutions.
* all matches. If src or dst is NULL, use $0. If ex=TRUE, enable * If nm = 0, replace all matches.
* subexpression matching (\1-\9) * If src or dst is NULL, use $0.
* If subexp != 0, enable subexpression matching (\1-\9).
*/ */
static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex) static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
{ {
char *ds = NULL; char *resbuf;
const char *s;
const char *sp; const char *sp;
int c, i, j, di, rl, so, eo, nbs, n, dssize; int match_no, residx, replen, resbufsize;
int regexec_flags;
regmatch_t pmatch[10]; regmatch_t pmatch[10];
regex_t sreg, *re; regex_t sreg, *regex;
re = as_regex(rn, &sreg); resbuf = NULL;
if (!src) residx = 0;
src = intvar[F0]; match_no = 0;
if (!dest) regexec_flags = 0;
dest = intvar[F0]; regex = as_regex(rn, &sreg);
sp = getvar_s(src ? src : intvar[F0]);
replen = strlen(repl);
while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
int so = pmatch[0].rm_so;
int eo = pmatch[0].rm_eo;
i = di = 0; //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
sp = getvar_s(src); resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
rl = strlen(repl); memcpy(resbuf + residx, sp, eo);
while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) { residx += eo;
so = pmatch[0].rm_so; if (++match_no >= nm) {
eo = pmatch[0].rm_eo; const char *s;
int nbs;
ds = qrealloc(ds, di + eo + rl, &dssize);
memcpy(ds + di, sp, eo);
di += eo;
if (++i >= nm) {
/* replace */ /* replace */
di -= (eo - so); residx -= (eo - so);
nbs = 0; nbs = 0;
for (s = repl; *s; s++) { for (s = repl; *s; s++) {
ds[di++] = c = *s; char c = resbuf[residx++] = *s;
if (c == '\\') { if (c == '\\') {
nbs++; nbs++;
continue; continue;
} }
if (c == '&' || (ex && c >= '0' && c <= '9')) { if (c == '&' || (subexp && c >= '0' && c <= '9')) {
di -= ((nbs + 3) >> 1); int j;
residx -= ((nbs + 3) >> 1);
j = 0; j = 0;
if (c != '&') { if (c != '&') {
j = c - '0'; j = c - '0';
nbs++; nbs++;
} }
if (nbs % 2) { if (nbs % 2) {
ds[di++] = c; resbuf[residx++] = c;
} else { } else {
n = pmatch[j].rm_eo - pmatch[j].rm_so; int n = pmatch[j].rm_eo - pmatch[j].rm_so;
ds = qrealloc(ds, di + rl + n, &dssize); resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
memcpy(ds + di, sp + pmatch[j].rm_so, n); memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
di += n; residx += n;
} }
} }
nbs = 0; nbs = 0;
} }
} }
regexec_flags = REG_NOTBOL;
sp += eo; sp += eo;
if (i == nm) if (match_no == nm)
break; break;
if (eo == so) { if (eo == so) {
ds[di] = *sp++; /* Empty match (e.g. "b*" will match anywhere).
if (!ds[di++]) * Advance by one char. */
break; //BUG (bug 1333):
//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
//... and will erroneously match "b" even though it is NOT at the word start.
//we need REG_NOTBOW but it does not exist...
/* Subtle: this is safe only because
* qrealloc allocated at least one extra byte */
resbuf[residx] = *sp;
if (*sp == '\0')
goto ret;
sp++;
residx++;
} }
} }
ds = qrealloc(ds, di + strlen(sp), &dssize); resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
strcpy(ds + di, sp); strcpy(resbuf + residx, sp);
setvar_p(dest, ds); ret:
if (re == &sreg) //bb_error_msg("end sp:'%s'%p", sp,sp);
regfree(re); setvar_p(dest ? dest : intvar[F0], resbuf);
return i; if (regex == &sreg)
regfree(regex);
return match_no;
} }
static NOINLINE int do_mktime(const char *ds) static NOINLINE int do_mktime(const char *ds)