From c49d2d97939d77be3d1f3bbbbf9db30a55771c15 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 6 Sep 2010 10:26:37 +0200 Subject: [PATCH] hush: fix globbing+backslashes in unquoted $var expansion Signed-off-by: Denys Vlasenko --- shell/hush.c | 138 ++++++++++------------ shell/hush_test/hush-glob/glob2.right | 18 +++ shell/hush_test/hush-glob/glob2.tests | 27 +++++ shell/hush_test/hush-vars/var_bash4.right | 25 +++- shell/hush_test/hush-vars/var_bash4.tests | 52 ++++++-- 5 files changed, 172 insertions(+), 88 deletions(-) create mode 100644 shell/hush_test/hush-glob/glob2.right create mode 100755 shell/hush_test/hush-glob/glob2.tests diff --git a/shell/hush.c b/shell/hush.c index 2a4e80b6e..ef46372de 100644 --- a/shell/hush.c +++ b/shell/hush.c @@ -2007,12 +2007,17 @@ static void o_addstr_with_NUL(o_string *o, const char *str) static void o_addblock_duplicate_backslash(o_string *o, const char *str, int len) { while (len) { - o_addchr(o, *str); - if (*str == '\\') { - o_addchr(o, '\\'); - } - str++; len--; + o_addchr(o, *str); + if (*str++ == '\\') { + /* \z -> \\\z; \ -> \\ */ + o_addchr(o, '\\'); + if (len) { + len--; + o_addchr(o, '\\'); + o_addchr(o, *str++); + } + } } } @@ -2067,12 +2072,8 @@ static void o_addQchr(o_string *o, int ch) o->data[o->length] = '\0'; } -static void o_addQblock(o_string *o, const char *str, int len) +static void o_addqblock(o_string *o, const char *str, int len) { - if (!o->o_escape) { - o_addblock(o, str, len); - return; - } while (len) { char ch; int sz; @@ -2099,6 +2100,15 @@ static void o_addQblock(o_string *o, const char *str, int len) } } +static void o_addQblock(o_string *o, const char *str, int len) +{ + if (!o->o_escape) { + o_addblock(o, str, len); + return; + } + o_addqblock(o, str, len); +} + static void o_addQstr(o_string *o, const char *str) { o_addQblock(o, str, strlen(str)); @@ -2356,11 +2366,11 @@ static int glob_brace(char *pattern, o_string *o, int n) /* Performs globbing on last list[], * saving each result as a new list[]. */ -static int o_glob(o_string *o, int n) +static int perform_glob(o_string *o, int n) { char *pattern, *copy; - debug_printf_glob("start o_glob: n:%d o->data:%p\n", n, o->data); + debug_printf_glob("start perform_glob: n:%d o->data:%p\n", n, o->data); if (!o->data) return o_save_ptr_helper(o, n); pattern = o->data + o_get_last_ptr(o, n); @@ -2378,7 +2388,7 @@ static int o_glob(o_string *o, int n) n = glob_brace(copy, o, n); free(copy); if (DEBUG_GLOB) - debug_print_list("o_glob returning", o, n); + debug_print_list("perform_glob returning", o, n); return n; } @@ -2403,13 +2413,13 @@ static int glob_needed(const char *s) /* Performs globbing on last list[], * saving each result as a new list[]. */ -static int o_glob(o_string *o, int n) +static int perform_glob(o_string *o, int n) { glob_t globdata; int gr; char *pattern; - debug_printf_glob("start o_glob: n:%d o->data:%p\n", n, o->data); + debug_printf_glob("start perform_glob: n:%d o->data:%p\n", n, o->data); if (!o->data) return o_save_ptr_helper(o, n); pattern = o->data + o_get_last_ptr(o, n); @@ -2455,7 +2465,7 @@ static int o_glob(o_string *o, int n) } globfree(&globdata); if (DEBUG_GLOB) - debug_print_list("o_glob returning", o, n); + debug_print_list("perform_glob returning", o, n); return n; } @@ -2470,7 +2480,7 @@ static int o_save_ptr(o_string *o, int n) * (if it was requested back then when it was filled) * so don't do that again! */ if (!o->has_empty_slot) - return o_glob(o, n); /* o_save_ptr_helper is inside */ + return perform_glob(o, n); /* o_save_ptr_helper is inside */ } return o_save_ptr_helper(o, n); } @@ -2927,15 +2937,6 @@ static int done_word(o_string *word, struct parse_context *ctx) (ctx->ctx_res_w == RES_SNTX)); return (ctx->ctx_res_w == RES_SNTX); } -# ifdef CMD_SINGLEWORD_NOGLOB_COND - if (strcmp(word->data, "export") == 0 -# if ENABLE_HUSH_LOCAL - || strcmp(word->data, "local") == 0 -# endif - ) { - command->cmd_type = CMD_SINGLEWORD_NOGLOB_COND; - } else -# endif # if ENABLE_HUSH_BASH_COMPAT if (strcmp(word->data, "[[") == 0) { command->cmd_type = CMD_SINGLEWORD_NOGLOB; @@ -4371,10 +4372,19 @@ static int expand_on_ifs(o_string *output, int n, const char *str) while (1) { int word_len = strcspn(str, G.ifs); if (word_len) { - if (output->o_escape || !output->o_glob) - o_addQblock(output, str, word_len); - else /* protect backslashes against globbing up :) */ + if (output->o_escape) + o_addqblock(output, str, word_len); + else if (!output->o_glob) + o_addblock(output, str, word_len); + else /* if (!escape && glob) */ { + /* Protect backslashes against globbing up :) + * Example: "v='\*'; echo b$v" + */ o_addblock_duplicate_backslash(output, str, word_len); + /*/ Why can't we do it easier? */ + /*o_addblock(output, str, word_len); - WRONG: "v='\*'; echo Z$v" prints "Z*" instead of "Z\*" */ + /*o_addqblock(output, str, word_len); - WRONG: "v='*'; echo Z$v" prints "Z*" instead of Z* files */ + } str += word_len; } if (!*str) /* EOL - do not finalize word */ @@ -4594,8 +4604,9 @@ static NOINLINE const char *expand_one_var(char **to_be_freed_pp, char *arg, cha if (exp_op == *exp_word) /* ## or %% */ exp_word++; //TODO: avoid xstrdup unless needed -// (see HACK ALERT below) +// (see HACK ALERT below for an example) val = to_be_freed = xstrdup(val); +//TODO: fix expansion rules: exp_exp_word = expand_pseudo_dquoted(exp_word); if (exp_exp_word) exp_word = exp_exp_word; @@ -4613,10 +4624,26 @@ static NOINLINE const char *expand_one_var(char **to_be_freed_pp, char *arg, cha } #if ENABLE_HUSH_BASH_COMPAT else if (exp_op == '/' || exp_op == '\\') { + /* It's ${var/[/]pattern[/repl]} thing. + * Note that in encoded form it has TWO parts: + * var/patternrepl + */ /* Empty variable always gives nothing: */ - // "v=''; echo ${v/*/w}" prints "" + // "v=''; echo ${v/*/w}" prints "", not "w" if (val && val[0]) { /* It's ${var/[/]pattern[/repl]} thing */ + /* + * Pattern is taken literally, while + * repl should be de-backslased and globbed + * by the usual expansion rules: + * >az; >bz; + * v='a bz'; echo "${v/a*z/a*z}" prints "a*z" + * v='a bz'; echo "${v/a*z/\z}" prints "\z" + * v='a bz'; echo ${v/a*z/a*z} prints "az" + * v='a bz'; echo ${v/a*z/\z} prints "z" + * (note that a*z _pattern_ is never globbed!) + */ +//TODO: fix expansion rules: char *pattern, *repl, *t; pattern = expand_pseudo_dquoted(exp_word); if (!pattern) @@ -4772,7 +4799,6 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char while ((p = strchr(arg, SPECIAL_VAR_SYMBOL)) != NULL) { char first_ch; - int i; char *to_be_freed = NULL; const char *val = NULL; #if ENABLE_HUSH_TICK @@ -4795,10 +4821,11 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char switch (first_ch & 0x7f) { /* Highest bit in first_ch indicates that var is double-quoted */ case '*': - case '@': - i = 1; - if (!G.global_argv[i]) + case '@': { + int i; + if (!G.global_argv[1]) break; + i = 1; ored_ch |= first_ch; /* do it for "$@" _now_, when we know it's not empty */ if (!(first_ch & 0x80)) { /* unquoted $* or $@ */ smallint sv = output->o_escape; @@ -4839,6 +4866,7 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char } } break; + } case SPECIAL_VAR_SYMBOL: /* */ /* "Empty variable", used to make "" etc to not disappear */ arg++; @@ -4984,41 +5012,6 @@ static char **expand_strvec_to_strvec_singleword_noglob(char **argv) } #endif -#ifdef CMD_SINGLEWORD_NOGLOB_COND -static char **expand_strvec_to_strvec_singleword_noglob_cond(char **argv) -{ - int n; - char **list; - char **v; - o_string output = NULL_O_STRING; - - n = 0; - v = argv; - while (*v) { - int is_var = is_well_formed_var_name(*v, '='); - /* is_var * 0x80: singleword expansion for vars */ - n = expand_vars_to_list(&output, n, *v, is_var * 0x80); - - /* Subtle! expand_vars_to_list did not glob last word yet. - * It does this only when fed with further data. - * Therefore we set globbing flags AFTER it, not before: - */ - - /* if it is not recognizably abc=...; then: */ - output.o_escape = !is_var; /* protect against globbing for "$var" */ - /* (unquoted $var will temporarily switch it off) */ - output.o_glob = !is_var; /* and indeed do globbing */ - v++; - } - debug_print_list("expand_cond", &output, n); - - /* output.data (malloced in one block) gets returned in "list" */ - list = o_finalize_list(&output, n); - debug_print_strings("expand_cond[1]", list); - return list; -} -#endif - /* Used for expansion of right hand of assignments */ /* NB: should NOT do globbing! * "export v=/bin/c*; env | grep ^v=" outputs "v=/bin/c*" */ @@ -6566,11 +6559,6 @@ static NOINLINE int run_pipe(struct pipe *pi) else if (command->cmd_type == CMD_SINGLEWORD_NOGLOB) { argv_expanded = expand_strvec_to_strvec_singleword_noglob(argv + command->assignment_cnt); } -#endif -#ifdef CMD_SINGLEWORD_NOGLOB_COND - else if (command->cmd_type == CMD_SINGLEWORD_NOGLOB_COND) { - argv_expanded = expand_strvec_to_strvec_singleword_noglob_cond(argv + command->assignment_cnt); - } #endif else { argv_expanded = expand_strvec_to_strvec(argv + command->assignment_cnt); diff --git a/shell/hush_test/hush-glob/glob2.right b/shell/hush_test/hush-glob/glob2.right new file mode 100644 index 000000000..7a70c2263 --- /dev/null +++ b/shell/hush_test/hush-glob/glob2.right @@ -0,0 +1,18 @@ +Expected Actual +Z\* : Z\* +Z* : Z* +Z\f : Z\f +Z\* : Z\* + +Z\z : Z\z +Zz : Zz +Z\z : Z\z +Z\z : Z\z + +Z\ : Z\ +Z\ : Z\ + +Z\f Zf : Z\f Zf +Z\f Zf : Z\f Zf + +Done: 0 diff --git a/shell/hush_test/hush-glob/glob2.tests b/shell/hush_test/hush-glob/glob2.tests new file mode 100755 index 000000000..4dbc92599 --- /dev/null +++ b/shell/hush_test/hush-glob/glob2.tests @@ -0,0 +1,27 @@ +# This test demonstrates that in unquoted $v, backslashes expand by this rule: +# \z -> \\\z; \ -> \\ (for any z, special or not), +# and subsequently globbing converts \\ to \ and treats \z as literal z +# even if it is a special char. + +>'Zf' +>'Z\f' + echo 'Expected' 'Actual' +v='\*'; echo 'Z\* :' Z$v + echo 'Z* :' Z\* + echo 'Z\f :' Z\\* + echo 'Z\* :' Z\\\* # NB! only this matches Z$v output +echo +v='\z'; echo 'Z\z :' Z$v + echo 'Zz :' Z\z + echo 'Z\z :' Z\\z + echo 'Z\z :' Z\\\z +echo +v='\'; echo 'Z\ :' Z$v + echo 'Z\ :' Z\\ +echo +v='*'; echo 'Z\f Zf :' Z$v + echo 'Z\f Zf :' Z* +echo + +rm 'Z\f' 'Zf' +echo Done: $? diff --git a/shell/hush_test/hush-vars/var_bash4.right b/shell/hush_test/hush-vars/var_bash4.right index 600e8532f..0ef1bf661 100644 --- a/shell/hush_test/hush-vars/var_bash4.right +++ b/shell/hush_test/hush-vars/var_bash4.right @@ -1,23 +1,40 @@ Source: a*b\*c Replace str: _\\_\z_ Pattern: single backslash and star: "replace literal star" -In assignment: a_\_z_b\*c Unquoted: a_\_z_b\*c +Unquoted =: a_\_z_b\*c Quoted: a_\_\z_b\*c +Quoted =: a_\_\z_b\*c Pattern: double backslash and star: "replace backslash and everything after it" -In assignment: a*b_\_z_ Unquoted: a*b_\_z_ +Unquoted =: a*b_\_z_ Quoted: a*b_\_\z_ +Quoted =: a*b_\_\z_ Source: a\bc Replace str: _\\_\z_ Pattern: single backslash and b: "replace b" -In assignment: a\_\_z_c Unquoted: a\_\_z_c +Unquoted =: a\_\_z_c Quoted: a\_\_\z_c +Quoted =: a\_\_\z_c Pattern: double backslash and b: "replace backslash and b" -In assignment: a_\_z_c Unquoted: a_\_z_c +Unquoted =: a_\_z_c Quoted: a_\_\z_c +Quoted =: a_\_\z_c + +Source: a\bc +Replace str: _\\_\z_ (as variable $s) +Pattern: single backslash and b: "replace b" +Unquoted: a\_\\_\z_c +Unquoted =: a\_\\_\z_c +Quoted: a\_\\_\z_c +Quoted =: a\_\\_\z_c +Pattern: double backslash and b: "replace backslash and b" +Unquoted: a_\\_\z_c +Unquoted =: a_\\_\z_c +Quoted: a_\\_\z_c +Quoted =: a_\\_\z_c Done: 0 diff --git a/shell/hush_test/hush-vars/var_bash4.tests b/shell/hush_test/hush-vars/var_bash4.tests index d5470614b..32aa2b34c 100755 --- a/shell/hush_test/hush-vars/var_bash4.tests +++ b/shell/hush_test/hush-vars/var_bash4.tests @@ -6,23 +6,30 @@ # even in quotes. # # bash4 (and probably bash3 too): "Quoted:" results are different from -# unquoted and assignment expansions - they have a backslash before z. +# unquoted expansions - they have a backslash before z. +# +# The difference only exists if repl is a literal. If it is a variable: +# ${v/.../$s}, then all backslashes are preserved in both cases. v='a*b\*c' echo 'Source: ' "$v" echo 'Replace str: ' '_\\_\z_' echo 'Pattern: ' 'single backslash and star: "replace literal star"' -r=${v/\*/_\\_\z_} -echo 'In assignment:' "$r" echo 'Unquoted: ' ${v/\*/_\\_\z_} +r=${v/\*/_\\_\z_} +echo 'Unquoted =: ' "$r" echo 'Quoted: ' "${v/\*/_\\_\z_}" +r="${v/\*/_\\_\z_}" +echo 'Quoted =: ' "$r" echo 'Pattern: ' 'double backslash and star: "replace backslash and everything after it"' -r=${v/\\*/_\\_\z_} -echo 'In assignment:' "$r" echo 'Unquoted: ' ${v/\\*/_\\_\z_} +r=${v/\\*/_\\_\z_} +echo 'Unquoted =: ' "$r" echo 'Quoted: ' "${v/\\*/_\\_\z_}" +r="${v/\\*/_\\_\z_}" +echo 'Quoted =: ' "$r" echo @@ -31,16 +38,43 @@ echo 'Source: ' "$v" echo 'Replace str: ' '_\\_\z_' echo 'Pattern: ' 'single backslash and b: "replace b"' -r=${v/\b/_\\_\z_} -echo 'In assignment:' "$r" echo 'Unquoted: ' ${v/\b/_\\_\z_} +r=${v/\b/_\\_\z_} +echo 'Unquoted =: ' "$r" echo 'Quoted: ' "${v/\b/_\\_\z_}" +r="${v/\b/_\\_\z_}" +echo 'Quoted =: ' "$r" echo 'Pattern: ' 'double backslash and b: "replace backslash and b"' -r=${v/\\b/_\\_\z_} -echo 'In assignment:' "$r" echo 'Unquoted: ' ${v/\\b/_\\_\z_} +r=${v/\\b/_\\_\z_} +echo 'Unquoted =: ' "$r" echo 'Quoted: ' "${v/\\b/_\\_\z_}" +r="${v/\\b/_\\_\z_}" +echo 'Quoted =: ' "$r" + +echo + +v='a\bc' +s='_\\_\z_' +echo 'Source: ' "$v" +echo 'Replace str: ' "$s" '(as variable $s)' + +echo 'Pattern: ' 'single backslash and b: "replace b"' +echo 'Unquoted: ' ${v/\b/$s} +r=${v/\b/$s} +echo 'Unquoted =: ' "$r" +echo 'Quoted: ' "${v/\b/$s}" +r="${v/\b/$s}" +echo 'Quoted =: ' "$r" + +echo 'Pattern: ' 'double backslash and b: "replace backslash and b"' +echo 'Unquoted: ' ${v/\\b/$s} +r=${v/\\b/$s} +echo 'Unquoted =: ' "$r" +echo 'Quoted: ' "${v/\\b/$s}" +r="${v/\\b/$s}" +echo 'Quoted =: ' "$r" echo