awk: don't append bogus data after NUL in sub(); shrink
also renamed variables to more sensible names function old new delta mk_re_node 56 49 -7 awk_sub 601 591 -10 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		
							
								
								
									
										114
									
								
								editors/awk.c
									
									
									
									
									
								
							
							
						
						
									
										114
									
								
								editors/awk.c
									
									
									
									
									
								
							@@ -1134,15 +1134,13 @@ static node *new_node(uint32_t info)
 | 
			
		||||
	return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static node *mk_re_node(const char *s, node *n, regex_t *re)
 | 
			
		||||
static void mk_re_node(const char *s, node *n, regex_t *re)
 | 
			
		||||
{
 | 
			
		||||
	n->info = OC_REGEXP;
 | 
			
		||||
	n->l.re = re;
 | 
			
		||||
	n->r.ire = re + 1;
 | 
			
		||||
	xregcomp(re, s, REG_EXTENDED);
 | 
			
		||||
	xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
 | 
			
		||||
 | 
			
		||||
	return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static node *condition(void)
 | 
			
		||||
@@ -1541,7 +1539,10 @@ static regex_t *as_regex(node *op, regex_t *preg)
 | 
			
		||||
	return preg;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* gradually increasing buffer */
 | 
			
		||||
/* gradually increasing buffer.
 | 
			
		||||
 * note that we reallocate even if n == old_size,
 | 
			
		||||
 * and thus there is at least one extra allocated byte.
 | 
			
		||||
 */
 | 
			
		||||
static char* qrealloc(char *b, int n, int *size)
 | 
			
		||||
{
 | 
			
		||||
	if (!b || n >= *size) {
 | 
			
		||||
@@ -1983,83 +1984,100 @@ static char *awk_printf(node *n)
 | 
			
		||||
	return b;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* common substitution routine
 | 
			
		||||
 * replace (nm) substring of (src) that match (n) with (repl), store
 | 
			
		||||
 * result into (dest), return number of substitutions. If nm=0, replace
 | 
			
		||||
 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
 | 
			
		||||
 * subexpression matching (\1-\9)
 | 
			
		||||
/* Common substitution routine.
 | 
			
		||||
 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
 | 
			
		||||
 * store result into (dest), return number of substitutions.
 | 
			
		||||
 * If nm = 0, replace all matches.
 | 
			
		||||
 * If src or dst is NULL, use $0.
 | 
			
		||||
 * If subexp != 0, enable subexpression matching (\1-\9).
 | 
			
		||||
 */
 | 
			
		||||
static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
 | 
			
		||||
static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
 | 
			
		||||
{
 | 
			
		||||
	char *ds = NULL;
 | 
			
		||||
	const char *s;
 | 
			
		||||
	char *resbuf;
 | 
			
		||||
	const char *sp;
 | 
			
		||||
	int c, i, j, di, rl, so, eo, nbs, n, dssize;
 | 
			
		||||
	int match_no, residx, replen, resbufsize;
 | 
			
		||||
	int regexec_flags;
 | 
			
		||||
	regmatch_t pmatch[10];
 | 
			
		||||
	regex_t sreg, *re;
 | 
			
		||||
	regex_t sreg, *regex;
 | 
			
		||||
 | 
			
		||||
	re = as_regex(rn, &sreg);
 | 
			
		||||
	if (!src)
 | 
			
		||||
		src = intvar[F0];
 | 
			
		||||
	if (!dest)
 | 
			
		||||
		dest = intvar[F0];
 | 
			
		||||
	resbuf = NULL;
 | 
			
		||||
	residx = 0;
 | 
			
		||||
	match_no = 0;
 | 
			
		||||
	regexec_flags = 0;
 | 
			
		||||
	regex = as_regex(rn, &sreg);
 | 
			
		||||
	sp = getvar_s(src ? src : intvar[F0]);
 | 
			
		||||
	replen = strlen(repl);
 | 
			
		||||
	while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
 | 
			
		||||
		int so = pmatch[0].rm_so;
 | 
			
		||||
		int eo = pmatch[0].rm_eo;
 | 
			
		||||
 | 
			
		||||
	i = di = 0;
 | 
			
		||||
	sp = getvar_s(src);
 | 
			
		||||
	rl = strlen(repl);
 | 
			
		||||
	while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
 | 
			
		||||
		so = pmatch[0].rm_so;
 | 
			
		||||
		eo = pmatch[0].rm_eo;
 | 
			
		||||
		//bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
 | 
			
		||||
		resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
 | 
			
		||||
		memcpy(resbuf + residx, sp, eo);
 | 
			
		||||
		residx += eo;
 | 
			
		||||
		if (++match_no >= nm) {
 | 
			
		||||
			const char *s;
 | 
			
		||||
			int nbs;
 | 
			
		||||
 | 
			
		||||
		ds = qrealloc(ds, di + eo + rl, &dssize);
 | 
			
		||||
		memcpy(ds + di, sp, eo);
 | 
			
		||||
		di += eo;
 | 
			
		||||
		if (++i >= nm) {
 | 
			
		||||
			/* replace */
 | 
			
		||||
			di -= (eo - so);
 | 
			
		||||
			residx -= (eo - so);
 | 
			
		||||
			nbs = 0;
 | 
			
		||||
			for (s = repl; *s; s++) {
 | 
			
		||||
				ds[di++] = c = *s;
 | 
			
		||||
				char c = resbuf[residx++] = *s;
 | 
			
		||||
				if (c == '\\') {
 | 
			
		||||
					nbs++;
 | 
			
		||||
					continue;
 | 
			
		||||
				}
 | 
			
		||||
				if (c == '&' || (ex && c >= '0' && c <= '9')) {
 | 
			
		||||
					di -= ((nbs + 3) >> 1);
 | 
			
		||||
				if (c == '&' || (subexp && c >= '0' && c <= '9')) {
 | 
			
		||||
					int j;
 | 
			
		||||
					residx -= ((nbs + 3) >> 1);
 | 
			
		||||
					j = 0;
 | 
			
		||||
					if (c != '&') {
 | 
			
		||||
						j = c - '0';
 | 
			
		||||
						nbs++;
 | 
			
		||||
					}
 | 
			
		||||
					if (nbs % 2) {
 | 
			
		||||
						ds[di++] = c;
 | 
			
		||||
						resbuf[residx++] = c;
 | 
			
		||||
					} else {
 | 
			
		||||
						n = pmatch[j].rm_eo - pmatch[j].rm_so;
 | 
			
		||||
						ds = qrealloc(ds, di + rl + n, &dssize);
 | 
			
		||||
						memcpy(ds + di, sp + pmatch[j].rm_so, n);
 | 
			
		||||
						di += n;
 | 
			
		||||
						int n = pmatch[j].rm_eo - pmatch[j].rm_so;
 | 
			
		||||
						resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
 | 
			
		||||
						memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
 | 
			
		||||
						residx += n;
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
				nbs = 0;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		regexec_flags = REG_NOTBOL;
 | 
			
		||||
		sp += eo;
 | 
			
		||||
		if (i == nm)
 | 
			
		||||
		if (match_no == nm)
 | 
			
		||||
			break;
 | 
			
		||||
		if (eo == so) {
 | 
			
		||||
			ds[di] = *sp++;
 | 
			
		||||
			if (!ds[di++])
 | 
			
		||||
				break;
 | 
			
		||||
			/* Empty match (e.g. "b*" will match anywhere).
 | 
			
		||||
			 * Advance by one char. */
 | 
			
		||||
//BUG (bug 1333):
 | 
			
		||||
//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
 | 
			
		||||
//... and will erroneously match "b" even though it is NOT at the word start.
 | 
			
		||||
//we need REG_NOTBOW but it does not exist...
 | 
			
		||||
			/* Subtle: this is safe only because
 | 
			
		||||
			 * qrealloc allocated at least one extra byte */
 | 
			
		||||
			resbuf[residx] = *sp;
 | 
			
		||||
			if (*sp == '\0')
 | 
			
		||||
				goto ret;
 | 
			
		||||
			sp++;
 | 
			
		||||
			residx++;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ds = qrealloc(ds, di + strlen(sp), &dssize);
 | 
			
		||||
	strcpy(ds + di, sp);
 | 
			
		||||
	setvar_p(dest, ds);
 | 
			
		||||
	if (re == &sreg)
 | 
			
		||||
		regfree(re);
 | 
			
		||||
	return i;
 | 
			
		||||
	resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
 | 
			
		||||
	strcpy(resbuf + residx, sp);
 | 
			
		||||
 ret:
 | 
			
		||||
	//bb_error_msg("end sp:'%s'%p", sp,sp);
 | 
			
		||||
	setvar_p(dest ? dest : intvar[F0], resbuf);
 | 
			
		||||
	if (regex == &sreg)
 | 
			
		||||
		regfree(regex);
 | 
			
		||||
	return match_no;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static NOINLINE int do_mktime(const char *ds)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user