awk: don't append bogus data after NUL in sub(); shrink
also renamed variables to more sensible names function old new delta mk_re_node 56 49 -7 awk_sub 601 591 -10 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
243ddcbc76
commit
fab288cf0b
114
editors/awk.c
114
editors/awk.c
@ -1134,15 +1134,13 @@ static node *new_node(uint32_t info)
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
static node *mk_re_node(const char *s, node *n, regex_t *re)
|
static void mk_re_node(const char *s, node *n, regex_t *re)
|
||||||
{
|
{
|
||||||
n->info = OC_REGEXP;
|
n->info = OC_REGEXP;
|
||||||
n->l.re = re;
|
n->l.re = re;
|
||||||
n->r.ire = re + 1;
|
n->r.ire = re + 1;
|
||||||
xregcomp(re, s, REG_EXTENDED);
|
xregcomp(re, s, REG_EXTENDED);
|
||||||
xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
|
xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
|
||||||
|
|
||||||
return n;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static node *condition(void)
|
static node *condition(void)
|
||||||
@ -1541,7 +1539,10 @@ static regex_t *as_regex(node *op, regex_t *preg)
|
|||||||
return preg;
|
return preg;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* gradually increasing buffer */
|
/* gradually increasing buffer.
|
||||||
|
* note that we reallocate even if n == old_size,
|
||||||
|
* and thus there is at least one extra allocated byte.
|
||||||
|
*/
|
||||||
static char* qrealloc(char *b, int n, int *size)
|
static char* qrealloc(char *b, int n, int *size)
|
||||||
{
|
{
|
||||||
if (!b || n >= *size) {
|
if (!b || n >= *size) {
|
||||||
@ -1983,83 +1984,100 @@ static char *awk_printf(node *n)
|
|||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* common substitution routine
|
/* Common substitution routine.
|
||||||
* replace (nm) substring of (src) that match (n) with (repl), store
|
* Replace (nm)'th substring of (src) that matches (rn) with (repl),
|
||||||
* result into (dest), return number of substitutions. If nm=0, replace
|
* store result into (dest), return number of substitutions.
|
||||||
* all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
|
* If nm = 0, replace all matches.
|
||||||
* subexpression matching (\1-\9)
|
* If src or dst is NULL, use $0.
|
||||||
|
* If subexp != 0, enable subexpression matching (\1-\9).
|
||||||
*/
|
*/
|
||||||
static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
|
static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
|
||||||
{
|
{
|
||||||
char *ds = NULL;
|
char *resbuf;
|
||||||
const char *s;
|
|
||||||
const char *sp;
|
const char *sp;
|
||||||
int c, i, j, di, rl, so, eo, nbs, n, dssize;
|
int match_no, residx, replen, resbufsize;
|
||||||
|
int regexec_flags;
|
||||||
regmatch_t pmatch[10];
|
regmatch_t pmatch[10];
|
||||||
regex_t sreg, *re;
|
regex_t sreg, *regex;
|
||||||
|
|
||||||
re = as_regex(rn, &sreg);
|
resbuf = NULL;
|
||||||
if (!src)
|
residx = 0;
|
||||||
src = intvar[F0];
|
match_no = 0;
|
||||||
if (!dest)
|
regexec_flags = 0;
|
||||||
dest = intvar[F0];
|
regex = as_regex(rn, &sreg);
|
||||||
|
sp = getvar_s(src ? src : intvar[F0]);
|
||||||
|
replen = strlen(repl);
|
||||||
|
while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
|
||||||
|
int so = pmatch[0].rm_so;
|
||||||
|
int eo = pmatch[0].rm_eo;
|
||||||
|
|
||||||
i = di = 0;
|
//bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
|
||||||
sp = getvar_s(src);
|
resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
|
||||||
rl = strlen(repl);
|
memcpy(resbuf + residx, sp, eo);
|
||||||
while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
|
residx += eo;
|
||||||
so = pmatch[0].rm_so;
|
if (++match_no >= nm) {
|
||||||
eo = pmatch[0].rm_eo;
|
const char *s;
|
||||||
|
int nbs;
|
||||||
|
|
||||||
ds = qrealloc(ds, di + eo + rl, &dssize);
|
|
||||||
memcpy(ds + di, sp, eo);
|
|
||||||
di += eo;
|
|
||||||
if (++i >= nm) {
|
|
||||||
/* replace */
|
/* replace */
|
||||||
di -= (eo - so);
|
residx -= (eo - so);
|
||||||
nbs = 0;
|
nbs = 0;
|
||||||
for (s = repl; *s; s++) {
|
for (s = repl; *s; s++) {
|
||||||
ds[di++] = c = *s;
|
char c = resbuf[residx++] = *s;
|
||||||
if (c == '\\') {
|
if (c == '\\') {
|
||||||
nbs++;
|
nbs++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (c == '&' || (ex && c >= '0' && c <= '9')) {
|
if (c == '&' || (subexp && c >= '0' && c <= '9')) {
|
||||||
di -= ((nbs + 3) >> 1);
|
int j;
|
||||||
|
residx -= ((nbs + 3) >> 1);
|
||||||
j = 0;
|
j = 0;
|
||||||
if (c != '&') {
|
if (c != '&') {
|
||||||
j = c - '0';
|
j = c - '0';
|
||||||
nbs++;
|
nbs++;
|
||||||
}
|
}
|
||||||
if (nbs % 2) {
|
if (nbs % 2) {
|
||||||
ds[di++] = c;
|
resbuf[residx++] = c;
|
||||||
} else {
|
} else {
|
||||||
n = pmatch[j].rm_eo - pmatch[j].rm_so;
|
int n = pmatch[j].rm_eo - pmatch[j].rm_so;
|
||||||
ds = qrealloc(ds, di + rl + n, &dssize);
|
resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
|
||||||
memcpy(ds + di, sp + pmatch[j].rm_so, n);
|
memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
|
||||||
di += n;
|
residx += n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nbs = 0;
|
nbs = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
regexec_flags = REG_NOTBOL;
|
||||||
sp += eo;
|
sp += eo;
|
||||||
if (i == nm)
|
if (match_no == nm)
|
||||||
break;
|
break;
|
||||||
if (eo == so) {
|
if (eo == so) {
|
||||||
ds[di] = *sp++;
|
/* Empty match (e.g. "b*" will match anywhere).
|
||||||
if (!ds[di++])
|
* Advance by one char. */
|
||||||
break;
|
//BUG (bug 1333):
|
||||||
|
//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
|
||||||
|
//... and will erroneously match "b" even though it is NOT at the word start.
|
||||||
|
//we need REG_NOTBOW but it does not exist...
|
||||||
|
/* Subtle: this is safe only because
|
||||||
|
* qrealloc allocated at least one extra byte */
|
||||||
|
resbuf[residx] = *sp;
|
||||||
|
if (*sp == '\0')
|
||||||
|
goto ret;
|
||||||
|
sp++;
|
||||||
|
residx++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ds = qrealloc(ds, di + strlen(sp), &dssize);
|
resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
|
||||||
strcpy(ds + di, sp);
|
strcpy(resbuf + residx, sp);
|
||||||
setvar_p(dest, ds);
|
ret:
|
||||||
if (re == &sreg)
|
//bb_error_msg("end sp:'%s'%p", sp,sp);
|
||||||
regfree(re);
|
setvar_p(dest ? dest : intvar[F0], resbuf);
|
||||||
return i;
|
if (regex == &sreg)
|
||||||
|
regfree(regex);
|
||||||
|
return match_no;
|
||||||
}
|
}
|
||||||
|
|
||||||
static NOINLINE int do_mktime(const char *ds)
|
static NOINLINE int do_mktime(const char *ds)
|
||||||
|
Loading…
Reference in New Issue
Block a user