awk: fix dodgy multi-char separators splitting logic
function old new delta awk_split 521 484 -37 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
89f063b900
commit
5323af7f51
@ -1765,10 +1765,9 @@ static void fsrealloc(int size)
|
|||||||
|
|
||||||
static int awk_split(const char *s, node *spl, char **slist)
|
static int awk_split(const char *s, node *spl, char **slist)
|
||||||
{
|
{
|
||||||
int l, n;
|
int n;
|
||||||
char c[4];
|
char c[4];
|
||||||
char *s1;
|
char *s1;
|
||||||
regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
|
|
||||||
|
|
||||||
/* in worst case, each char would be a separate field */
|
/* in worst case, each char would be a separate field */
|
||||||
*slist = s1 = xzalloc(strlen(s) * 2 + 3);
|
*slist = s1 = xzalloc(strlen(s) * 2 + 3);
|
||||||
@ -1785,12 +1784,18 @@ static int awk_split(const char *s, node *spl, char **slist)
|
|||||||
return n; /* "": zero fields */
|
return n; /* "": zero fields */
|
||||||
n++; /* at least one field will be there */
|
n++; /* at least one field will be there */
|
||||||
do {
|
do {
|
||||||
|
int l;
|
||||||
|
regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
|
||||||
|
|
||||||
l = strcspn(s, c+2); /* len till next NUL or \n */
|
l = strcspn(s, c+2); /* len till next NUL or \n */
|
||||||
if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
|
if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
|
||||||
&& pmatch[0].rm_so <= l
|
&& pmatch[0].rm_so <= l
|
||||||
) {
|
) {
|
||||||
l = pmatch[0].rm_so;
|
l = pmatch[0].rm_so;
|
||||||
if (pmatch[0].rm_eo == 0) {
|
if (pmatch[0].rm_eo == 0) {
|
||||||
|
/* For example, happens when FS can match
|
||||||
|
* an empthy string (awk -F ' *')
|
||||||
|
*/
|
||||||
l++;
|
l++;
|
||||||
pmatch[0].rm_eo++;
|
pmatch[0].rm_eo++;
|
||||||
}
|
}
|
||||||
@ -1800,14 +1805,16 @@ static int awk_split(const char *s, node *spl, char **slist)
|
|||||||
if (s[l])
|
if (s[l])
|
||||||
pmatch[0].rm_eo++;
|
pmatch[0].rm_eo++;
|
||||||
}
|
}
|
||||||
memcpy(s1, s, l);
|
s1 = mempcpy(s1, s, l);
|
||||||
/* make sure we remove *all* of the separator chars */
|
*s1++ = '\0';
|
||||||
do {
|
|
||||||
s1[l] = '\0';
|
|
||||||
} while (++l < pmatch[0].rm_eo);
|
|
||||||
nextword(&s1);
|
|
||||||
s += pmatch[0].rm_eo;
|
s += pmatch[0].rm_eo;
|
||||||
} while (*s);
|
} while (*s);
|
||||||
|
|
||||||
|
/* echo a-- | awk -F-- '{ print NF, length($NF), $NF }'
|
||||||
|
* should print "2 0 ":
|
||||||
|
*/
|
||||||
|
*s1 = '\0';
|
||||||
|
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
if (c[0] == '\0') { /* null split */
|
if (c[0] == '\0') { /* null split */
|
||||||
@ -2011,7 +2018,7 @@ static int ptest(node *pattern)
|
|||||||
static int awk_getline(rstream *rsm, var *v)
|
static int awk_getline(rstream *rsm, var *v)
|
||||||
{
|
{
|
||||||
char *b;
|
char *b;
|
||||||
regmatch_t pmatch[2];
|
regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
|
||||||
int size, a, p, pp = 0;
|
int size, a, p, pp = 0;
|
||||||
int fd, so, eo, r, rp;
|
int fd, so, eo, r, rp;
|
||||||
char c, *m, *s;
|
char c, *m, *s;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user