From 8bca3e20b9c057e9144af27870ca3905f1e5d316 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 30 Jun 2009 19:19:37 +0200 Subject: [PATCH] sed: fix handling of 1d;1,3p and 1d;1,REGEXp function old new delta process_files 2084 2173 +89 Signed-off-by: Denys Vlasenko --- editors/sed.c | 486 +++++++++++++++++++++++--------------------- testsuite/sed.tests | 13 ++ 2 files changed, 265 insertions(+), 234 deletions(-) diff --git a/editors/sed.c b/editors/sed.c index eb31f7d2e..2127301d5 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -865,63 +865,80 @@ static void process_files(void) /* Prime the pump */ next_line = get_next_line(&next_gets_char); - /* go through every line in each file */ + /* Go through every line in each file */ again: substituted = 0; /* Advance to next line. Stop if out of lines. */ pattern_space = next_line; - if (!pattern_space) return; + if (!pattern_space) + return; last_gets_char = next_gets_char; /* Read one line in advance so we can act on the last line, * the '$' address */ next_line = get_next_line(&next_gets_char); linenum++; + + /* For every line, go through all the commands */ restart: - /* for every line, go through all the commands */ for (sed_cmd = G.sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { int old_matched, matched; old_matched = sed_cmd->in_match; /* Determine if this command matches this line: */ - - /* Are we continuing a previous multi-line match? */ + /* Are we continuing a previous multi-line match? */ sed_cmd->in_match = sed_cmd->in_match /* Or is no range necessary? */ || (!sed_cmd->beg_line && !sed_cmd->end_line && !sed_cmd->beg_match && !sed_cmd->end_match) /* Or did we match the start of a numerical range? */ - || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) + || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum + /* "shadowed beginning" case: "1d;1,ENDp" - p still matches at line 2 + * even though 1d skipped line 1 which is a start line for p */ + || (sed_cmd->end_line && sed_cmd->beg_line < linenum && sed_cmd->end_line >= linenum) + || (sed_cmd->end_match && sed_cmd->beg_line < linenum) + ) + ) /* Or does this line match our begin address regex? */ || (beg_match(sed_cmd, pattern_space)) /* Or did we match last line of input? */ || (sed_cmd->beg_line == -1 && next_line == NULL); - /* Snapshot the value */ - matched = sed_cmd->in_match; - /* Is this line the end of the current match? */ + //bb_error_msg("cmd:'%c' matched:%d beg_line:%d end_line:%d linenum:%d", + //sed_cmd->cmd, matched, sed_cmd->beg_line, sed_cmd->end_line, linenum); + /* Is this line the end of the current match? */ if (matched) { - sed_cmd->in_match = !( + int n = ( /* has the ending line come, or is this a single address command? */ - (sed_cmd->end_line ? + sed_cmd->end_line ? sed_cmd->end_line == -1 ? !next_line : (sed_cmd->end_line <= linenum) : !sed_cmd->end_match - ) + ); + if (!n) { /* or does this line matches our last address regex */ - || (sed_cmd->end_match && old_matched + n = (sed_cmd->end_match + && old_matched && (regexec(sed_cmd->end_match, - pattern_space, 0, NULL, 0) == 0)) - ); + pattern_space, 0, NULL, 0) == 0) + ); + if (n && sed_cmd->beg_line > 0) { + /* Once matched, "n,regex" range is dead, disabling it */ + regfree(sed_cmd->end_match); + free(sed_cmd->end_match); + sed_cmd->end_match = NULL; + } + } + sed_cmd->in_match = !n; } - /* Skip blocks of commands we didn't match. */ + /* Skip blocks of commands we didn't match */ if (sed_cmd->cmd == '{') { if (sed_cmd->invert ? matched : !matched) { while (sed_cmd->cmd != '}') { @@ -934,253 +951,254 @@ static void process_files(void) } /* Okay, so did this line match? */ - if (sed_cmd->invert ? !matched : matched) { - /* Update last used regex in case a blank substitute BRE is found */ - if (sed_cmd->beg_match) { - G.previous_regex_ptr = sed_cmd->beg_match; - } + if (sed_cmd->invert ? matched : !matched) + continue; /* no */ - /* actual sedding */ - switch (sed_cmd->cmd) { + /* Update last used regex in case a blank substitute BRE is found */ + if (sed_cmd->beg_match) { + G.previous_regex_ptr = sed_cmd->beg_match; + } - /* Print line number */ - case '=': - fprintf(G.nonstdout, "%d\n", linenum); - break; + /* actual sedding */ + switch (sed_cmd->cmd) { - /* Write the current pattern space up to the first newline */ - case 'P': - { - char *tmp = strchr(pattern_space, '\n'); + /* Print line number */ + case '=': + fprintf(G.nonstdout, "%d\n", linenum); + break; - if (tmp) { - *tmp = '\0'; - /* TODO: explain why '\n' below */ - sed_puts(pattern_space, '\n'); - *tmp = '\n'; - break; - } - /* Fall Through */ - } + /* Write the current pattern space up to the first newline */ + case 'P': + { + char *tmp = strchr(pattern_space, '\n'); - /* Write the current pattern space to output */ - case 'p': - /* NB: we print this _before_ the last line - * (of current file) is printed. Even if - * that line is nonterminated, we print - * '\n' here (gnu sed does the same) */ + if (tmp) { + *tmp = '\0'; + /* TODO: explain why '\n' below */ sed_puts(pattern_space, '\n'); - break; - /* Delete up through first newline */ - case 'D': - { - char *tmp = strchr(pattern_space, '\n'); - - if (tmp) { - tmp = xstrdup(tmp+1); - free(pattern_space); - pattern_space = tmp; - goto restart; - } - } - /* discard this line. */ - case 'd': - goto discard_line; - - /* Substitute with regex */ - case 's': - if (!do_subst_command(sed_cmd, &pattern_space)) - break; - substituted |= 1; - - /* handle p option */ - if (sed_cmd->sub_p) - sed_puts(pattern_space, last_gets_char); - /* handle w option */ - if (sed_cmd->sw_file) - puts_maybe_newline( - pattern_space, sed_cmd->sw_file, - &sed_cmd->sw_last_char, last_gets_char); - break; - - /* Append line to linked list to be printed later */ - case 'a': - append(sed_cmd->string); - break; - - /* Insert text before this line */ - case 'i': - sed_puts(sed_cmd->string, '\n'); - break; - - /* Cut and paste text (replace) */ - case 'c': - /* Only triggers on last line of a matching range. */ - if (!sed_cmd->in_match) - sed_puts(sed_cmd->string, NO_EOL_CHAR); - goto discard_line; - - /* Read file, append contents to output */ - case 'r': - { - FILE *rfile; - - rfile = fopen_for_read(sed_cmd->string); - if (rfile) { - char *line; - - while ((line = xmalloc_fgetline(rfile)) - != NULL) - append(line); - xprint_and_close_file(rfile); - } - + *tmp = '\n'; break; } + /* Fall Through */ + } - /* Write pattern space to file. */ - case 'w': + /* Write the current pattern space to output */ + case 'p': + /* NB: we print this _before_ the last line + * (of current file) is printed. Even if + * that line is nonterminated, we print + * '\n' here (gnu sed does the same) */ + sed_puts(pattern_space, '\n'); + break; + /* Delete up through first newline */ + case 'D': + { + char *tmp = strchr(pattern_space, '\n'); + + if (tmp) { + tmp = xstrdup(tmp+1); + free(pattern_space); + pattern_space = tmp; + goto restart; + } + } + /* discard this line. */ + case 'd': + goto discard_line; + + /* Substitute with regex */ + case 's': + if (!do_subst_command(sed_cmd, &pattern_space)) + break; + substituted |= 1; + + /* handle p option */ + if (sed_cmd->sub_p) + sed_puts(pattern_space, last_gets_char); + /* handle w option */ + if (sed_cmd->sw_file) puts_maybe_newline( pattern_space, sed_cmd->sw_file, &sed_cmd->sw_last_char, last_gets_char); - break; + break; - /* Read next line from input */ - case 'n': - if (!G.be_quiet) - sed_puts(pattern_space, last_gets_char); - if (next_line) { - free(pattern_space); - pattern_space = next_line; - last_gets_char = next_gets_char; - next_line = get_next_line(&next_gets_char); - substituted = 0; - linenum++; - break; - } - /* fall through */ + /* Append line to linked list to be printed later */ + case 'a': + append(sed_cmd->string); + break; - /* Quit. End of script, end of input. */ - case 'q': - /* Exit the outer while loop */ - free(next_line); - next_line = NULL; - goto discard_commands; + /* Insert text before this line */ + case 'i': + sed_puts(sed_cmd->string, '\n'); + break; - /* Append the next line to the current line */ - case 'N': - { - int len; - /* If no next line, jump to end of script and exit. */ - if (next_line == NULL) { - /* Jump to end of script and exit */ - free(next_line); - next_line = NULL; - goto discard_line; - /* append next_line, read new next_line. */ - } - len = strlen(pattern_space); - pattern_space = realloc(pattern_space, len + strlen(next_line) + 2); - pattern_space[len] = '\n'; - strcpy(pattern_space + len+1, next_line); + /* Cut and paste text (replace) */ + case 'c': + /* Only triggers on last line of a matching range. */ + if (!sed_cmd->in_match) + sed_puts(sed_cmd->string, NO_EOL_CHAR); + goto discard_line; + + /* Read file, append contents to output */ + case 'r': + { + FILE *rfile; + + rfile = fopen_for_read(sed_cmd->string); + if (rfile) { + char *line; + + while ((line = xmalloc_fgetline(rfile)) + != NULL) + append(line); + xprint_and_close_file(rfile); + } + + break; + } + + /* Write pattern space to file. */ + case 'w': + puts_maybe_newline( + pattern_space, sed_cmd->sw_file, + &sed_cmd->sw_last_char, last_gets_char); + break; + + /* Read next line from input */ + case 'n': + if (!G.be_quiet) + sed_puts(pattern_space, last_gets_char); + if (next_line) { + free(pattern_space); + pattern_space = next_line; last_gets_char = next_gets_char; next_line = get_next_line(&next_gets_char); + substituted = 0; linenum++; break; } + /* fall through */ - /* Test/branch if substitution occurred */ - case 't': - if (!substituted) break; - substituted = 0; - /* Fall through */ - /* Test/branch if substitution didn't occur */ - case 'T': - if (substituted) break; - /* Fall through */ - /* Branch to label */ - case 'b': - if (!sed_cmd->string) goto discard_commands; - else sed_cmd = branch_to(sed_cmd->string); - break; - /* Transliterate characters */ - case 'y': - { - int i, j; + /* Quit. End of script, end of input. */ + case 'q': + /* Exit the outer while loop */ + free(next_line); + next_line = NULL; + goto discard_commands; - for (i = 0; pattern_space[i]; i++) { - for (j = 0; sed_cmd->string[j]; j += 2) { - if (pattern_space[i] == sed_cmd->string[j]) { - pattern_space[i] = sed_cmd->string[j + 1]; - break; - } + /* Append the next line to the current line */ + case 'N': + { + int len; + /* If no next line, jump to end of script and exit. */ + if (next_line == NULL) { + /* Jump to end of script and exit */ + free(next_line); + next_line = NULL; + goto discard_line; + /* append next_line, read new next_line. */ + } + len = strlen(pattern_space); + pattern_space = realloc(pattern_space, len + strlen(next_line) + 2); + pattern_space[len] = '\n'; + strcpy(pattern_space + len+1, next_line); + last_gets_char = next_gets_char; + next_line = get_next_line(&next_gets_char); + linenum++; + break; + } + + /* Test/branch if substitution occurred */ + case 't': + if (!substituted) break; + substituted = 0; + /* Fall through */ + /* Test/branch if substitution didn't occur */ + case 'T': + if (substituted) break; + /* Fall through */ + /* Branch to label */ + case 'b': + if (!sed_cmd->string) goto discard_commands; + else sed_cmd = branch_to(sed_cmd->string); + break; + /* Transliterate characters */ + case 'y': + { + int i, j; + + for (i = 0; pattern_space[i]; i++) { + for (j = 0; sed_cmd->string[j]; j += 2) { + if (pattern_space[i] == sed_cmd->string[j]) { + pattern_space[i] = sed_cmd->string[j + 1]; + break; } } - - break; } - case 'g': /* Replace pattern space with hold space */ - free(pattern_space); - pattern_space = xstrdup(G.hold_space ? G.hold_space : ""); - break; - case 'G': /* Append newline and hold space to pattern space */ - { - int pattern_space_size = 2; - int hold_space_size = 0; - if (pattern_space) - pattern_space_size += strlen(pattern_space); - if (G.hold_space) - hold_space_size = strlen(G.hold_space); - pattern_space = xrealloc(pattern_space, - pattern_space_size + hold_space_size); - if (pattern_space_size == 2) - pattern_space[0] = 0; - strcat(pattern_space, "\n"); - if (G.hold_space) - strcat(pattern_space, G.hold_space); - last_gets_char = '\n'; - - break; - } - case 'h': /* Replace hold space with pattern space */ - free(G.hold_space); - G.hold_space = xstrdup(pattern_space); - break; - case 'H': /* Append newline and pattern space to hold space */ - { - int hold_space_size = 2; - int pattern_space_size = 0; - - if (G.hold_space) - hold_space_size += strlen(G.hold_space); - if (pattern_space) - pattern_space_size = strlen(pattern_space); - G.hold_space = xrealloc(G.hold_space, - hold_space_size + pattern_space_size); - - if (hold_space_size == 2) - *G.hold_space = 0; - strcat(G.hold_space, "\n"); - if (pattern_space) - strcat(G.hold_space, pattern_space); - - break; - } - case 'x': /* Exchange hold and pattern space */ - { - char *tmp = pattern_space; - pattern_space = G.hold_space ? : xzalloc(1); - last_gets_char = '\n'; - G.hold_space = tmp; - break; - } - } + break; } - } + case 'g': /* Replace pattern space with hold space */ + free(pattern_space); + pattern_space = xstrdup(G.hold_space ? G.hold_space : ""); + break; + case 'G': /* Append newline and hold space to pattern space */ + { + int pattern_space_size = 2; + int hold_space_size = 0; + + if (pattern_space) + pattern_space_size += strlen(pattern_space); + if (G.hold_space) + hold_space_size = strlen(G.hold_space); + pattern_space = xrealloc(pattern_space, + pattern_space_size + hold_space_size); + if (pattern_space_size == 2) + pattern_space[0] = 0; + strcat(pattern_space, "\n"); + if (G.hold_space) + strcat(pattern_space, G.hold_space); + last_gets_char = '\n'; + + break; + } + case 'h': /* Replace hold space with pattern space */ + free(G.hold_space); + G.hold_space = xstrdup(pattern_space); + break; + case 'H': /* Append newline and pattern space to hold space */ + { + int hold_space_size = 2; + int pattern_space_size = 0; + + if (G.hold_space) + hold_space_size += strlen(G.hold_space); + if (pattern_space) + pattern_space_size = strlen(pattern_space); + G.hold_space = xrealloc(G.hold_space, + hold_space_size + pattern_space_size); + + if (hold_space_size == 2) + *G.hold_space = 0; + strcat(G.hold_space, "\n"); + if (pattern_space) + strcat(G.hold_space, pattern_space); + + break; + } + case 'x': /* Exchange hold and pattern space */ + { + char *tmp = pattern_space; + pattern_space = G.hold_space ? : xzalloc(1); + last_gets_char = '\n'; + G.hold_space = tmp; + break; + } + } /* switch */ + } /* for each cmd */ /* - * exit point from sedding... + * Exit point from sedding... */ discard_commands: /* we will print the line unless we were told to be quiet ('-n') diff --git a/testsuite/sed.tests b/testsuite/sed.tests index 9a7f886c1..8af156ae9 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests @@ -207,4 +207,17 @@ testing "sed n command must reset 'substituted' bit" \ "sed 's/1/x/;T;n;: next;s/3/y/;t quit;n;b next;: quit;q'" \ "0\nx\n2\ny\n" "" "0\n1\n2\n3\n" +testing "sed d does not break n,m matching" \ + "sed -n '1d;1,3p'" \ + "second\nthird\n" "" "first\nsecond\nthird\nfourth\n" + +testing "sed d does not break n,regex matching" \ + "sed -n '1d;1,/hir/p'" \ + "second\nthird\n" "" "first\nsecond\nthird\nfourth\n" + +testing "sed d does not break n,regex matching #2" \ + "sed -n '1,5d;1,/hir/p'" \ + "second2\nthird2\n" "" \ + "first\nsecond\nthird\nfourth\n""first2\nsecond2\nthird2\nfourth2\n" + exit $FAILCOUNT