I just whipped up support for arbitrary regex delimiters, so
now things such as $ echo foo | sed 'sxfooxb\arx' bar will work as expected (and so doogie can stop complaining). -Erik
This commit is contained in:
parent
01bda5de6b
commit
28b3c53f0d
@ -36,11 +36,9 @@
|
|||||||
Unsupported features:
|
Unsupported features:
|
||||||
|
|
||||||
- transliteration (y/source-chars/dest-chars/) (use 'tr')
|
- transliteration (y/source-chars/dest-chars/) (use 'tr')
|
||||||
- no support for characters other than the '/' character for regex matches
|
|
||||||
- no pattern space hold space storing / swapping (x, etc.)
|
- no pattern space hold space storing / swapping (x, etc.)
|
||||||
- no labels / branching (: label, b, t, and friends)
|
- no labels / branching (: label, b, t, and friends)
|
||||||
- and lots, lots more.
|
- and lots, lots more.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@ -63,6 +61,7 @@ struct sed_cmd {
|
|||||||
|
|
||||||
|
|
||||||
/* GENERAL FIELDS */
|
/* GENERAL FIELDS */
|
||||||
|
char delimiter; /* The delimiter used to separate regexps */
|
||||||
|
|
||||||
/* address storage */
|
/* address storage */
|
||||||
int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
|
int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
|
||||||
@ -128,64 +127,17 @@ static void destroy_cmd_strs()
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
|
||||||
/*
|
|
||||||
* trim_str - trims leading and trailing space from a string
|
|
||||||
*
|
|
||||||
* Note: This returns a malloc'ed string so you must store and free it
|
|
||||||
* XXX: This should be in the utility.c file.
|
|
||||||
* XXX: This is now obsolete. Maybe it belongs nowhere.
|
|
||||||
*/
|
|
||||||
static char *trim_str(const char *str)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
char *retstr = strdup(str);
|
|
||||||
|
|
||||||
/* trim leading whitespace */
|
|
||||||
memmove(retstr, &retstr[strspn(retstr, " \n\t\v")], strlen(retstr));
|
|
||||||
|
|
||||||
/* trim trailing whitespace */
|
|
||||||
i = strlen(retstr) - 1;
|
|
||||||
while (isspace(retstr[i]))
|
|
||||||
i--;
|
|
||||||
retstr[++i] = 0;
|
|
||||||
|
|
||||||
/* Aside:
|
|
||||||
*
|
|
||||||
* you know, a strrspn() would really be nice cuz then we could say:
|
|
||||||
*
|
|
||||||
* retstr[strrspn(retstr, " \n\t\v") + 1] = 0;
|
|
||||||
*/
|
|
||||||
|
|
||||||
return retstr;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/*
|
|
||||||
* strrspn - works just like strspn() but goes from right to left instead of
|
|
||||||
* left to right
|
|
||||||
*/
|
|
||||||
static size_t strrspn(const char *s, const char *accept)
|
|
||||||
{
|
|
||||||
size_t i = strlen(s);
|
|
||||||
|
|
||||||
while (strchr(accept, s[--i]))
|
|
||||||
;
|
|
||||||
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* index_of_next_unescaped_slash - walks left to right through a string
|
* index_of_next_unescaped_regexp_delim - walks left to right through a string
|
||||||
* beginning at a specified index and returns the index of the next forward
|
* beginning at a specified index and returns the index of the next regular
|
||||||
* slash ('/') not preceeded by a backslash ('\').
|
* expression delimiter (typically a forward * slash ('/')) not preceeded by
|
||||||
|
* a backslash ('\').
|
||||||
*/
|
*/
|
||||||
static int index_of_next_unescaped_slash(const char *str, int idx)
|
static int index_of_next_unescaped_regexp_delim(struct sed_cmd *sed_cmd, const char *str, int idx)
|
||||||
{
|
{
|
||||||
for ( ; str[idx]; idx++) {
|
for ( ; str[idx]; idx++) {
|
||||||
if (str[idx] == '/' && str[idx-1] != '\\')
|
if (str[idx] == sed_cmd->delimiter && str[idx-1] != '\\')
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -196,7 +148,7 @@ static int index_of_next_unescaped_slash(const char *str, int idx)
|
|||||||
/*
|
/*
|
||||||
* returns the index in the string just past where the address ends.
|
* returns the index in the string just past where the address ends.
|
||||||
*/
|
*/
|
||||||
static int get_address(const char *str, int *line, regex_t **regex)
|
static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, regex_t **regex)
|
||||||
{
|
{
|
||||||
char *my_str = strdup(str);
|
char *my_str = strdup(str);
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
@ -213,7 +165,7 @@ static int get_address(const char *str, int *line, regex_t **regex)
|
|||||||
idx++;
|
idx++;
|
||||||
}
|
}
|
||||||
else if (my_str[idx] == '/') {
|
else if (my_str[idx] == '/') {
|
||||||
idx = index_of_next_unescaped_slash(my_str, ++idx);
|
idx = index_of_next_unescaped_regexp_delim(sed_cmd, my_str, ++idx);
|
||||||
if (idx == -1)
|
if (idx == -1)
|
||||||
error_msg_and_die("unterminated match expression\n");
|
error_msg_and_die("unterminated match expression\n");
|
||||||
my_str[idx] = '\0';
|
my_str[idx] = '\0';
|
||||||
@ -256,13 +208,16 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
|
|||||||
* (all three of the '/' slashes are mandatory)
|
* (all three of the '/' slashes are mandatory)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* verify that the 's' is followed by a 'slash' */
|
/* verify that the 's' is followed by something. That something
|
||||||
if (substr[++idx] != '/')
|
* (typically a 'slash') is now our regexp delimiter... */
|
||||||
|
if (!substr[++idx])
|
||||||
error_msg_and_die("bad format in substitution expression\n");
|
error_msg_and_die("bad format in substitution expression\n");
|
||||||
|
else
|
||||||
|
sed_cmd->delimiter=substr[idx];
|
||||||
|
|
||||||
/* save the match string */
|
/* save the match string */
|
||||||
oldidx = idx+1;
|
oldidx = idx+1;
|
||||||
idx = index_of_next_unescaped_slash(substr, ++idx);
|
idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx);
|
||||||
if (idx == -1)
|
if (idx == -1)
|
||||||
error_msg_and_die("bad format in substitution expression\n");
|
error_msg_and_die("bad format in substitution expression\n");
|
||||||
match = strdup_substr(substr, oldidx, idx);
|
match = strdup_substr(substr, oldidx, idx);
|
||||||
@ -281,7 +236,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
|
|||||||
|
|
||||||
/* save the replacement string */
|
/* save the replacement string */
|
||||||
oldidx = idx+1;
|
oldidx = idx+1;
|
||||||
idx = index_of_next_unescaped_slash(substr, ++idx);
|
idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx);
|
||||||
if (idx == -1)
|
if (idx == -1)
|
||||||
error_msg_and_die("bad format in substitution expression\n");
|
error_msg_and_die("bad format in substitution expression\n");
|
||||||
sed_cmd->replace = strdup_substr(substr, oldidx, idx);
|
sed_cmd->replace = strdup_substr(substr, oldidx, idx);
|
||||||
@ -401,11 +356,11 @@ static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
|
|||||||
|
|
||||||
/* first part (if present) is an address: either a number or a /regex/ */
|
/* first part (if present) is an address: either a number or a /regex/ */
|
||||||
if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/')
|
if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/')
|
||||||
idx = get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
|
idx = get_address(sed_cmd, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
|
||||||
|
|
||||||
/* second part (if present) will begin with a comma */
|
/* second part (if present) will begin with a comma */
|
||||||
if (cmdstr[idx] == ',')
|
if (cmdstr[idx] == ',')
|
||||||
idx += get_address(&cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match);
|
idx += get_address(sed_cmd, &cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match);
|
||||||
|
|
||||||
/* last part (mandatory) will be a command */
|
/* last part (mandatory) will be a command */
|
||||||
if (cmdstr[idx] == '\0')
|
if (cmdstr[idx] == '\0')
|
||||||
|
81
sed.c
81
sed.c
@ -36,11 +36,9 @@
|
|||||||
Unsupported features:
|
Unsupported features:
|
||||||
|
|
||||||
- transliteration (y/source-chars/dest-chars/) (use 'tr')
|
- transliteration (y/source-chars/dest-chars/) (use 'tr')
|
||||||
- no support for characters other than the '/' character for regex matches
|
|
||||||
- no pattern space hold space storing / swapping (x, etc.)
|
- no pattern space hold space storing / swapping (x, etc.)
|
||||||
- no labels / branching (: label, b, t, and friends)
|
- no labels / branching (: label, b, t, and friends)
|
||||||
- and lots, lots more.
|
- and lots, lots more.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@ -63,6 +61,7 @@ struct sed_cmd {
|
|||||||
|
|
||||||
|
|
||||||
/* GENERAL FIELDS */
|
/* GENERAL FIELDS */
|
||||||
|
char delimiter; /* The delimiter used to separate regexps */
|
||||||
|
|
||||||
/* address storage */
|
/* address storage */
|
||||||
int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
|
int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
|
||||||
@ -128,64 +127,17 @@ static void destroy_cmd_strs()
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
|
||||||
/*
|
|
||||||
* trim_str - trims leading and trailing space from a string
|
|
||||||
*
|
|
||||||
* Note: This returns a malloc'ed string so you must store and free it
|
|
||||||
* XXX: This should be in the utility.c file.
|
|
||||||
* XXX: This is now obsolete. Maybe it belongs nowhere.
|
|
||||||
*/
|
|
||||||
static char *trim_str(const char *str)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
char *retstr = strdup(str);
|
|
||||||
|
|
||||||
/* trim leading whitespace */
|
|
||||||
memmove(retstr, &retstr[strspn(retstr, " \n\t\v")], strlen(retstr));
|
|
||||||
|
|
||||||
/* trim trailing whitespace */
|
|
||||||
i = strlen(retstr) - 1;
|
|
||||||
while (isspace(retstr[i]))
|
|
||||||
i--;
|
|
||||||
retstr[++i] = 0;
|
|
||||||
|
|
||||||
/* Aside:
|
|
||||||
*
|
|
||||||
* you know, a strrspn() would really be nice cuz then we could say:
|
|
||||||
*
|
|
||||||
* retstr[strrspn(retstr, " \n\t\v") + 1] = 0;
|
|
||||||
*/
|
|
||||||
|
|
||||||
return retstr;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/*
|
|
||||||
* strrspn - works just like strspn() but goes from right to left instead of
|
|
||||||
* left to right
|
|
||||||
*/
|
|
||||||
static size_t strrspn(const char *s, const char *accept)
|
|
||||||
{
|
|
||||||
size_t i = strlen(s);
|
|
||||||
|
|
||||||
while (strchr(accept, s[--i]))
|
|
||||||
;
|
|
||||||
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* index_of_next_unescaped_slash - walks left to right through a string
|
* index_of_next_unescaped_regexp_delim - walks left to right through a string
|
||||||
* beginning at a specified index and returns the index of the next forward
|
* beginning at a specified index and returns the index of the next regular
|
||||||
* slash ('/') not preceeded by a backslash ('\').
|
* expression delimiter (typically a forward * slash ('/')) not preceeded by
|
||||||
|
* a backslash ('\').
|
||||||
*/
|
*/
|
||||||
static int index_of_next_unescaped_slash(const char *str, int idx)
|
static int index_of_next_unescaped_regexp_delim(struct sed_cmd *sed_cmd, const char *str, int idx)
|
||||||
{
|
{
|
||||||
for ( ; str[idx]; idx++) {
|
for ( ; str[idx]; idx++) {
|
||||||
if (str[idx] == '/' && str[idx-1] != '\\')
|
if (str[idx] == sed_cmd->delimiter && str[idx-1] != '\\')
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -196,7 +148,7 @@ static int index_of_next_unescaped_slash(const char *str, int idx)
|
|||||||
/*
|
/*
|
||||||
* returns the index in the string just past where the address ends.
|
* returns the index in the string just past where the address ends.
|
||||||
*/
|
*/
|
||||||
static int get_address(const char *str, int *line, regex_t **regex)
|
static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, regex_t **regex)
|
||||||
{
|
{
|
||||||
char *my_str = strdup(str);
|
char *my_str = strdup(str);
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
@ -213,7 +165,7 @@ static int get_address(const char *str, int *line, regex_t **regex)
|
|||||||
idx++;
|
idx++;
|
||||||
}
|
}
|
||||||
else if (my_str[idx] == '/') {
|
else if (my_str[idx] == '/') {
|
||||||
idx = index_of_next_unescaped_slash(my_str, ++idx);
|
idx = index_of_next_unescaped_regexp_delim(sed_cmd, my_str, ++idx);
|
||||||
if (idx == -1)
|
if (idx == -1)
|
||||||
error_msg_and_die("unterminated match expression\n");
|
error_msg_and_die("unterminated match expression\n");
|
||||||
my_str[idx] = '\0';
|
my_str[idx] = '\0';
|
||||||
@ -256,13 +208,16 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
|
|||||||
* (all three of the '/' slashes are mandatory)
|
* (all three of the '/' slashes are mandatory)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* verify that the 's' is followed by a 'slash' */
|
/* verify that the 's' is followed by something. That something
|
||||||
if (substr[++idx] != '/')
|
* (typically a 'slash') is now our regexp delimiter... */
|
||||||
|
if (!substr[++idx])
|
||||||
error_msg_and_die("bad format in substitution expression\n");
|
error_msg_and_die("bad format in substitution expression\n");
|
||||||
|
else
|
||||||
|
sed_cmd->delimiter=substr[idx];
|
||||||
|
|
||||||
/* save the match string */
|
/* save the match string */
|
||||||
oldidx = idx+1;
|
oldidx = idx+1;
|
||||||
idx = index_of_next_unescaped_slash(substr, ++idx);
|
idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx);
|
||||||
if (idx == -1)
|
if (idx == -1)
|
||||||
error_msg_and_die("bad format in substitution expression\n");
|
error_msg_and_die("bad format in substitution expression\n");
|
||||||
match = strdup_substr(substr, oldidx, idx);
|
match = strdup_substr(substr, oldidx, idx);
|
||||||
@ -281,7 +236,7 @@ static int parse_subst_cmd(struct sed_cmd *sed_cmd, const char *substr)
|
|||||||
|
|
||||||
/* save the replacement string */
|
/* save the replacement string */
|
||||||
oldidx = idx+1;
|
oldidx = idx+1;
|
||||||
idx = index_of_next_unescaped_slash(substr, ++idx);
|
idx = index_of_next_unescaped_regexp_delim(sed_cmd, substr, ++idx);
|
||||||
if (idx == -1)
|
if (idx == -1)
|
||||||
error_msg_and_die("bad format in substitution expression\n");
|
error_msg_and_die("bad format in substitution expression\n");
|
||||||
sed_cmd->replace = strdup_substr(substr, oldidx, idx);
|
sed_cmd->replace = strdup_substr(substr, oldidx, idx);
|
||||||
@ -401,11 +356,11 @@ static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
|
|||||||
|
|
||||||
/* first part (if present) is an address: either a number or a /regex/ */
|
/* first part (if present) is an address: either a number or a /regex/ */
|
||||||
if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/')
|
if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/')
|
||||||
idx = get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
|
idx = get_address(sed_cmd, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
|
||||||
|
|
||||||
/* second part (if present) will begin with a comma */
|
/* second part (if present) will begin with a comma */
|
||||||
if (cmdstr[idx] == ',')
|
if (cmdstr[idx] == ',')
|
||||||
idx += get_address(&cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match);
|
idx += get_address(sed_cmd, &cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match);
|
||||||
|
|
||||||
/* last part (mandatory) will be a command */
|
/* last part (mandatory) will be a command */
|
||||||
if (cmdstr[idx] == '\0')
|
if (cmdstr[idx] == '\0')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user