sed: code shrink
text data bss dec hex filename 876354 493 7584 884431 d7ecf busybox_old 876323 493 7584 884400 d7eb0 busybox_unstripped Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
		
							
								
								
									
										146
									
								
								editors/sed.c
									
									
									
									
									
								
							
							
						
						
									
										146
									
								
								editors/sed.c
									
									
									
									
									
								
							@@ -14,49 +14,47 @@
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/* Code overview.
 | 
			
		||||
 *
 | 
			
		||||
 * Files are laid out to avoid unnecessary function declarations.  So for
 | 
			
		||||
 * example, every function add_cmd calls occurs before add_cmd in this file.
 | 
			
		||||
 *
 | 
			
		||||
 * add_cmd() is called on each line of sed command text (from a file or from
 | 
			
		||||
 * the command line).  It calls get_address() and parse_cmd_args().  The
 | 
			
		||||
 * resulting sed_cmd_t structures are appended to a linked list
 | 
			
		||||
 * (G.sed_cmd_head/G.sed_cmd_tail).
 | 
			
		||||
 *
 | 
			
		||||
 * add_input_file() adds a FILE* to the list of input files.  We need to
 | 
			
		||||
 * know all input sources ahead of time to find the last line for the $ match.
 | 
			
		||||
 *
 | 
			
		||||
 * process_files() does actual sedding, reading data lines from each input FILE *
 | 
			
		||||
 * (which could be stdin) and applying the sed command list (sed_cmd_head) to
 | 
			
		||||
 * each of the resulting lines.
 | 
			
		||||
 *
 | 
			
		||||
 * sed_main() is where external code calls into this, with a command line.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
  Files are laid out to avoid unnecessary function declarations.  So for
 | 
			
		||||
  example, every function add_cmd calls occurs before add_cmd in this file.
 | 
			
		||||
 | 
			
		||||
  add_cmd() is called on each line of sed command text (from a file or from
 | 
			
		||||
  the command line).  It calls get_address() and parse_cmd_args().  The
 | 
			
		||||
  resulting sed_cmd_t structures are appended to a linked list
 | 
			
		||||
  (G.sed_cmd_head/G.sed_cmd_tail).
 | 
			
		||||
 | 
			
		||||
  add_input_file() adds a FILE* to the list of input files.  We need to
 | 
			
		||||
  know all input sources ahead of time to find the last line for the $ match.
 | 
			
		||||
 | 
			
		||||
  process_files() does actual sedding, reading data lines from each input FILE *
 | 
			
		||||
  (which could be stdin) and applying the sed command list (sed_cmd_head) to
 | 
			
		||||
  each of the resulting lines.
 | 
			
		||||
 | 
			
		||||
  sed_main() is where external code calls into this, with a command line.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
	Supported features and commands in this version of sed:
 | 
			
		||||
 | 
			
		||||
	 - comments ('#')
 | 
			
		||||
	 - address matching: num|/matchstr/[,num|/matchstr/|$]command
 | 
			
		||||
	 - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
 | 
			
		||||
	 - edit commands: (a)ppend, (i)nsert, (c)hange
 | 
			
		||||
	 - file commands: (r)ead
 | 
			
		||||
	 - backreferences in substitution expressions (\0, \1, \2...\9)
 | 
			
		||||
	 - grouped commands: {cmd1;cmd2}
 | 
			
		||||
	 - transliteration (y/source-chars/dest-chars/)
 | 
			
		||||
	 - pattern space hold space storing / swapping (g, h, x)
 | 
			
		||||
	 - labels / branching (: label, b, t, T)
 | 
			
		||||
 | 
			
		||||
	 (Note: Specifying an address (range) to match is *optional*; commands
 | 
			
		||||
	 default to the whole pattern space if no specific address match was
 | 
			
		||||
	 requested.)
 | 
			
		||||
 | 
			
		||||
	Todo:
 | 
			
		||||
	 - Create a wrapper around regex to make libc's regex conform with sed
 | 
			
		||||
 | 
			
		||||
	Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
 | 
			
		||||
*/
 | 
			
		||||
/* Supported features and commands in this version of sed:
 | 
			
		||||
 *
 | 
			
		||||
 * - comments ('#')
 | 
			
		||||
 * - address matching: num|/matchstr/[,num|/matchstr/|$]command
 | 
			
		||||
 * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
 | 
			
		||||
 * - edit commands: (a)ppend, (i)nsert, (c)hange
 | 
			
		||||
 * - file commands: (r)ead
 | 
			
		||||
 * - backreferences in substitution expressions (\0, \1, \2...\9)
 | 
			
		||||
 * - grouped commands: {cmd1;cmd2}
 | 
			
		||||
 * - transliteration (y/source-chars/dest-chars/)
 | 
			
		||||
 * - pattern space hold space storing / swapping (g, h, x)
 | 
			
		||||
 * - labels / branching (: label, b, t, T)
 | 
			
		||||
 *
 | 
			
		||||
 * (Note: Specifying an address (range) to match is *optional*; commands
 | 
			
		||||
 * default to the whole pattern space if no specific address match was
 | 
			
		||||
 * requested.)
 | 
			
		||||
 *
 | 
			
		||||
 * Todo:
 | 
			
		||||
 * - Create a wrapper around regex to make libc's regex conform with sed
 | 
			
		||||
 *
 | 
			
		||||
 * Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
//usage:#define sed_trivial_usage
 | 
			
		||||
//usage:       "[-efinr] SED_CMD [FILE]..."
 | 
			
		||||
@@ -244,11 +242,13 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
 | 
			
		||||
		delimiter = -delimiter;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for (; (ch = str[idx]); idx++) {
 | 
			
		||||
	for (; (ch = str[idx]) != '\0'; idx++) {
 | 
			
		||||
		if (bracket >= 0) {
 | 
			
		||||
			if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
 | 
			
		||||
					&& str[idx - 1] == '^')))
 | 
			
		||||
			if (ch == ']'
 | 
			
		||||
			 && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^'))
 | 
			
		||||
			) {
 | 
			
		||||
				bracket = -1;
 | 
			
		||||
			}
 | 
			
		||||
		} else if (escaped)
 | 
			
		||||
			escaped = 0;
 | 
			
		||||
		else if (ch == '\\')
 | 
			
		||||
@@ -434,11 +434,47 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
 | 
			
		||||
 */
 | 
			
		||||
static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
 | 
			
		||||
{
 | 
			
		||||
	static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}";
 | 
			
		||||
	enum {
 | 
			
		||||
		IDX_s = 0,
 | 
			
		||||
		IDX_a,
 | 
			
		||||
		IDX_i,
 | 
			
		||||
		IDX_c,
 | 
			
		||||
		IDX_r,
 | 
			
		||||
		IDX_w,
 | 
			
		||||
		IDX_colon,
 | 
			
		||||
		IDX_b,
 | 
			
		||||
		IDX_t,
 | 
			
		||||
		IDX_T,
 | 
			
		||||
		IDX_y,
 | 
			
		||||
		IDX_d,
 | 
			
		||||
		IDX_D,
 | 
			
		||||
		IDX_g,
 | 
			
		||||
		IDX_G,
 | 
			
		||||
		IDX_h,
 | 
			
		||||
		IDX_H,
 | 
			
		||||
		IDX_l,
 | 
			
		||||
		IDX_n,
 | 
			
		||||
		IDX_N,
 | 
			
		||||
		IDX_p,
 | 
			
		||||
		IDX_P,
 | 
			
		||||
		IDX_q,
 | 
			
		||||
		IDX_x,
 | 
			
		||||
		IDX_equal,
 | 
			
		||||
		IDX_lbrace,
 | 
			
		||||
		IDX_rbrace,
 | 
			
		||||
		IDX_nul
 | 
			
		||||
	};
 | 
			
		||||
	struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; };
 | 
			
		||||
 | 
			
		||||
	unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters;
 | 
			
		||||
 | 
			
		||||
	/* handle (s)ubstitution command */
 | 
			
		||||
	if (sed_cmd->cmd == 's')
 | 
			
		||||
	if (idx == IDX_s) {
 | 
			
		||||
		cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
 | 
			
		||||
	}
 | 
			
		||||
	/* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
 | 
			
		||||
	else if (strchr("aic", sed_cmd->cmd)) {
 | 
			
		||||
	else if (idx <= IDX_c) { /* a,i,c */
 | 
			
		||||
		if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
 | 
			
		||||
			bb_error_msg_and_die("only a beginning address can be specified for edit commands");
 | 
			
		||||
		for (;;) {
 | 
			
		||||
@@ -454,8 +490,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
 | 
			
		||||
		/* "\anychar" -> "anychar" */
 | 
			
		||||
		parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
 | 
			
		||||
		cmdstr += strlen(cmdstr);
 | 
			
		||||
	}
 | 
			
		||||
	/* handle file cmds: (r)ead */
 | 
			
		||||
	} else if (strchr("rw", sed_cmd->cmd)) {
 | 
			
		||||
	else if (idx <= IDX_w) { /* r,w */
 | 
			
		||||
		if (sed_cmd->end_line || sed_cmd->end_match)
 | 
			
		||||
			bb_error_msg_and_die("command only uses one address");
 | 
			
		||||
		cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
 | 
			
		||||
@@ -463,8 +500,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
 | 
			
		||||
			sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
 | 
			
		||||
			sed_cmd->sw_last_char = '\n';
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	/* handle branch commands */
 | 
			
		||||
	} else if (strchr(":btT", sed_cmd->cmd)) {
 | 
			
		||||
	else if (idx <= IDX_T) { /* :,b,t,T */
 | 
			
		||||
		int length;
 | 
			
		||||
 | 
			
		||||
		cmdstr = skip_whitespace(cmdstr);
 | 
			
		||||
@@ -475,7 +513,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	/* translation command */
 | 
			
		||||
	else if (sed_cmd->cmd == 'y') {
 | 
			
		||||
	else if (idx == IDX_y) {
 | 
			
		||||
		char *match, *replace;
 | 
			
		||||
		int i = cmdstr[0];
 | 
			
		||||
 | 
			
		||||
@@ -495,7 +533,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
 | 
			
		||||
	/* if it wasnt a single-letter command that takes no arguments
 | 
			
		||||
	 * then it must be an invalid command.
 | 
			
		||||
	 */
 | 
			
		||||
	else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) {
 | 
			
		||||
	else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */
 | 
			
		||||
		bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@@ -966,9 +1004,9 @@ static void process_files(void)
 | 
			
		||||
			}
 | 
			
		||||
			sed_cmd->in_match = !(
 | 
			
		||||
				/* has the ending line come, or is this a single address command? */
 | 
			
		||||
				(sed_cmd->end_line ?
 | 
			
		||||
					sed_cmd->end_line == -1 ?
 | 
			
		||||
						!next_line
 | 
			
		||||
				(sed_cmd->end_line
 | 
			
		||||
					? sed_cmd->end_line == -1
 | 
			
		||||
						? !next_line
 | 
			
		||||
						: (sed_cmd->end_line <= linenum)
 | 
			
		||||
					: !sed_cmd->end_match
 | 
			
		||||
				)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user