sed: fix handling of escaped delimiters in s/// search pattern, closes 14541
function old new delta copy_parsing_escapes 67 96 +29 parse_regex_delim 109 111 +2 get_address 213 215 +2 add_cmd 1176 1178 +2 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/0 up/down: 35/0) Total: 35 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
33a9f34df5
commit
e998c7c032
@ -246,7 +246,6 @@ static void cleanup_outname(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
|
/* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
|
||||||
|
|
||||||
static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to)
|
static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to)
|
||||||
{
|
{
|
||||||
char *d = dest;
|
char *d = dest;
|
||||||
@ -276,7 +275,7 @@ static unsigned parse_escapes(char *dest, const char *string, int len, char from
|
|||||||
return d - dest;
|
return d - dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *copy_parsing_escapes(const char *string, int len)
|
static char *copy_parsing_escapes(const char *string, int len, char delim)
|
||||||
{
|
{
|
||||||
const char *s;
|
const char *s;
|
||||||
char *dest = xmalloc(len + 1);
|
char *dest = xmalloc(len + 1);
|
||||||
@ -287,10 +286,15 @@ static char *copy_parsing_escapes(const char *string, int len)
|
|||||||
len = parse_escapes(dest, string, len, s[1], s[0]);
|
len = parse_escapes(dest, string, len, s[1], s[0]);
|
||||||
string = dest;
|
string = dest;
|
||||||
}
|
}
|
||||||
|
if (delim) {
|
||||||
|
/* we additionally unescape any instances of escaped delimiter.
|
||||||
|
* For example, in 's+9\++X+' the pattern is "9+", not "9\+".
|
||||||
|
*/
|
||||||
|
len = parse_escapes(dest, string, len, delim, delim);
|
||||||
|
}
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* index_of_next_unescaped_regexp_delim - walks left to right through a string
|
* index_of_next_unescaped_regexp_delim - walks left to right through a string
|
||||||
* beginning at a specified index and returns the index of the next regular
|
* beginning at a specified index and returns the index of the next regular
|
||||||
@ -347,12 +351,11 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
|
|||||||
|
|
||||||
/* save the match string */
|
/* save the match string */
|
||||||
idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
|
idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
|
||||||
*match = copy_parsing_escapes(cmdstr_ptr, idx);
|
*match = copy_parsing_escapes(cmdstr_ptr, idx, delimiter);
|
||||||
|
|
||||||
/* save the replacement string */
|
/* save the replacement string */
|
||||||
cmdstr_ptr += idx + 1;
|
cmdstr_ptr += idx + 1;
|
||||||
idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
|
idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
|
||||||
*replace = copy_parsing_escapes(cmdstr_ptr, idx);
|
*replace = copy_parsing_escapes(cmdstr_ptr, idx, 0);
|
||||||
|
|
||||||
return ((cmdstr_ptr - cmdstr) + idx);
|
return ((cmdstr_ptr - cmdstr) + idx);
|
||||||
}
|
}
|
||||||
@ -380,7 +383,7 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex)
|
|||||||
delimiter = *++pos;
|
delimiter = *++pos;
|
||||||
next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
|
next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
|
||||||
if (next != 0) {
|
if (next != 0) {
|
||||||
temp = copy_parsing_escapes(pos, next);
|
temp = copy_parsing_escapes(pos, next, 0);
|
||||||
G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t));
|
G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t));
|
||||||
xregcomp(*regex, temp, G.regex_type);
|
xregcomp(*regex, temp, G.regex_type);
|
||||||
free(temp);
|
free(temp);
|
||||||
@ -575,7 +578,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
|||||||
cmdstr++;
|
cmdstr++;
|
||||||
}
|
}
|
||||||
len = strlen(cmdstr);
|
len = strlen(cmdstr);
|
||||||
sed_cmd->string = copy_parsing_escapes(cmdstr, len);
|
sed_cmd->string = copy_parsing_escapes(cmdstr, len, 0);
|
||||||
cmdstr += len;
|
cmdstr += len;
|
||||||
/* "\anychar" -> "anychar" */
|
/* "\anychar" -> "anychar" */
|
||||||
parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0');
|
parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0');
|
||||||
|
@ -324,6 +324,16 @@ testing "sed zero chars match/replace logic must not falsely trigger here 2" \
|
|||||||
"sed 's/ *$/_/g'" \
|
"sed 's/ *$/_/g'" \
|
||||||
"qwerty_\n" "" "qwerty\n"
|
"qwerty_\n" "" "qwerty\n"
|
||||||
|
|
||||||
|
# the pattern here is interpreted as "9+", not as "9\+"
|
||||||
|
testing "sed special char as s/// delimiter, in pattern" \
|
||||||
|
"sed 's+9\++X+'" \
|
||||||
|
"X8=17\n" "" "9+8=17\n"
|
||||||
|
|
||||||
|
# but in replacement string, "\&" remains "\&", not interpreted as "&"
|
||||||
|
testing "sed special char as s/// delimiter, in replacement" \
|
||||||
|
"sed 's&9&X\&&'" \
|
||||||
|
"X&+8=17\n" "" "9+8=17\n"
|
||||||
|
|
||||||
testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \
|
testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \
|
||||||
"sed ': testcont; /\\\\$/{ =; N; b testcont }'" \
|
"sed ': testcont; /\\\\$/{ =; N; b testcont }'" \
|
||||||
"\
|
"\
|
||||||
|
Loading…
Reference in New Issue
Block a user