From a7f4e4bbd8d7a47a49404d28bc07ab3b5dc1c19b Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Wed, 2 Apr 2008 20:24:09 +0000 Subject: [PATCH] expr: fix comparisons 'a < b' where we were overflowing a-b (not to mention that we used int, not arith_t). closes bug 2744. Also, shrink a bit and add testsuite entry function old new delta nextarg 75 84 +9 tostring 38 35 -3 toarith 89 86 -3 str_value 35 32 -3 eval6 555 552 -3 int_value 29 23 -6 eval4 128 120 -8 eval3 112 104 -8 eval2 512 417 -95 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 1/8 up/down: 9/-129) Total: -120 bytes --- coreutils/expr.c | 171 ++++++++++++++++++++-------------------- testsuite/expr/expr-big | 16 ++++ 2 files changed, 100 insertions(+), 87 deletions(-) create mode 100644 testsuite/expr/expr-big diff --git a/coreutils/expr.c b/coreutils/expr.c index 959f5200c..2f9c5c1e3 100644 --- a/coreutils/expr.c +++ b/coreutils/expr.c @@ -28,13 +28,6 @@ #include "libbb.h" #include "xregex.h" -/* The kinds of value we can have. */ -enum valtype { - integer, - string -}; -typedef enum valtype TYPE; - #if ENABLE_EXPR_MATH_SUPPORT_64 typedef int64_t arith_t; @@ -51,10 +44,16 @@ typedef long arith_t; /* TODO: use bb_strtol[l]? It's easier to check for errors... */ +/* The kinds of value we can have. */ +enum { + INTEGER, + STRING +}; + /* A value is.... */ struct valinfo { - TYPE type; /* Which kind. */ - union { /* The value itself. */ + smallint type; /* Which kind. */ + union { /* The value itself. */ arith_t i; char *s; } u; @@ -77,8 +76,9 @@ static VALUE *int_value(arith_t i) { VALUE *v; - v = xmalloc(sizeof(VALUE)); - v->type = integer; + v = xzalloc(sizeof(VALUE)); + if (INTEGER) /* otherwise xzaaloc did it already */ + v->type = INTEGER; v->u.i = i; return v; } @@ -89,46 +89,47 @@ static VALUE *str_value(const char *s) { VALUE *v; - v = xmalloc(sizeof(VALUE)); - v->type = string; + v = xzalloc(sizeof(VALUE)); + if (STRING) /* otherwise xzaaloc did it already */ + v->type = STRING; v->u.s = xstrdup(s); return v; } /* Free VALUE V, including structure components. */ -static void freev(VALUE * v) +static void freev(VALUE *v) { - if (v->type == string) + if (v->type == STRING) free(v->u.s); free(v); } /* Return nonzero if V is a null-string or zero-number. */ -static int null(VALUE * v) +static int null(VALUE *v) { - if (v->type == integer) + if (v->type == INTEGER) return v->u.i == 0; - /* string: */ + /* STRING: */ return v->u.s[0] == '\0' || LONE_CHAR(v->u.s, '0'); } -/* Coerce V to a string value (can't fail). */ +/* Coerce V to a STRING value (can't fail). */ -static void tostring(VALUE * v) +static void tostring(VALUE *v) { - if (v->type == integer) { + if (v->type == INTEGER) { v->u.s = xasprintf("%" PF_REZ "d", PF_REZ_TYPE v->u.i); - v->type = string; + v->type = STRING; } } -/* Coerce V to an integer value. Return 1 on success, 0 on failure. */ +/* Coerce V to an INTEGER value. Return 1 on success, 0 on failure. */ -static bool toarith(VALUE * v) +static bool toarith(VALUE *v) { - if (v->type == string) { + if (v->type == STRING) { arith_t i; char *e; @@ -139,50 +140,54 @@ static bool toarith(VALUE * v) return 0; free(v->u.s); v->u.i = i; - v->type = integer; + v->type = INTEGER; } return 1; } -/* Return nonzero if the next token matches STR exactly. +/* Return str[0]+str[1] if the next token matches STR exactly. STR must not be NULL. */ -static bool nextarg(const char *str) +static int nextarg(const char *str) { - if (*G.args == NULL) + if (*G.args == NULL || strcmp(*G.args, str) != 0) return 0; - return strcmp(*G.args, str) == 0; + return (unsigned char)str[0] + (unsigned char)str[1]; } /* The comparison operator handling functions. */ -static int cmp_common(VALUE * l, VALUE * r, int op) +static int cmp_common(VALUE *l, VALUE *r, int op) { - int cmpval; + arith_t ll, rr; - if (l->type == string || r->type == string) { + ll = l->u.i; + rr = r->u.i; + if (l->type == STRING || r->type == STRING) { tostring(l); tostring(r); - cmpval = strcmp(l->u.s, r->u.s); - } else - cmpval = l->u.i - r->u.i; + ll = strcmp(l->u.s, r->u.s); + rr = 0; + } + /* calculating ll - rr and checking the result is prone to overflows. + * We'll do it differently: */ if (op == '<') - return cmpval < 0; - if (op == ('L' + 'E')) - return cmpval <= 0; - if (op == '=') - return cmpval == 0; - if (op == '!') - return cmpval != 0; + return ll < rr; + if (op == ('<' + '=')) + return ll <= rr; + if (op == '=' || (op == '=' + '=')) + return ll == rr; + if (op == '!' + '=') + return ll != rr; if (op == '>') - return cmpval > 0; + return ll > rr; /* >= */ - return cmpval >= 0; + return ll >= rr; } /* The arithmetic operator handling functions. */ -static arith_t arithmetic_common(VALUE * l, VALUE * r, int op) +static arith_t arithmetic_common(VALUE *l, VALUE *r, int op) { arith_t li, ri; @@ -190,25 +195,24 @@ static arith_t arithmetic_common(VALUE * l, VALUE * r, int op) bb_error_msg_and_die("non-numeric argument"); li = l->u.i; ri = r->u.i; - if ((op == '/' || op == '%') && ri == 0) - bb_error_msg_and_die("division by zero"); if (op == '+') return li + ri; - else if (op == '-') + if (op == '-') return li - ri; - else if (op == '*') + if (op == '*') return li * ri; - else if (op == '/') + if (ri == 0) + bb_error_msg_and_die("division by zero"); + if (op == '/') return li / ri; - else - return li % ri; + return li % ri; } /* Do the : operator. SV is the VALUE for the lhs (the string), PV is the VALUE for the rhs (the pattern). */ -static VALUE *docolon(VALUE * sv, VALUE * pv) +static VALUE *docolon(VALUE *sv, VALUE *pv) { VALUE *v; regex_t re_buffer; @@ -230,14 +234,16 @@ of a basic regular expression is not portable; it is being ignored", pv->u.s); /* expr uses an anchored pattern match, so check that there was a * match and that the match starts at offset 0. */ - if (regexec(&re_buffer, sv->u.s, NMATCH, re_regs, 0) != REG_NOMATCH && - re_regs[0].rm_so == 0) { + if (regexec(&re_buffer, sv->u.s, NMATCH, re_regs, 0) != REG_NOMATCH + && re_regs[0].rm_so == 0 + ) { /* Were \(...\) used? */ if (re_buffer.re_nsub > 0) { sv->u.s[re_regs[1].rm_eo] = '\0'; v = str_value(sv->u.s + re_regs[1].rm_so); - } else + } else { v = int_value(re_regs[0].rm_eo); + } } else { /* Match failed -- return the right kind of null. */ if (re_buffer.re_nsub > 0) @@ -327,7 +333,7 @@ static VALUE *eval6(void) v = str_value(""); else { v = xmalloc(sizeof(VALUE)); - v->type = string; + v->type = STRING; v->u.s = xstrndup(l->u.s + i1->u.i - 1, i2->u.i); } freev(l); @@ -367,14 +373,11 @@ static VALUE *eval4(void) l = eval5(); while (1) { - if (nextarg("*")) - op = '*'; - else if (nextarg("/")) - op = '/'; - else if (nextarg("%")) - op = '%'; - else - return l; + op = nextarg("*"); + if (!op) { op = nextarg("/"); + if (!op) { op = nextarg("%"); + if (!op) return l; + }} G.args++; r = eval5(); val = arithmetic_common(l, r, op); @@ -394,12 +397,11 @@ static VALUE *eval3(void) l = eval4(); while (1) { - if (nextarg("+")) - op = '+'; - else if (nextarg("-")) - op = '-'; - else - return l; + op = nextarg("+"); + if (!op) { + op = nextarg("-"); + if (!op) return l; + } G.args++; r = eval4(); val = arithmetic_common(l, r, op); @@ -419,20 +421,15 @@ static VALUE *eval2(void) l = eval3(); while (1) { - if (nextarg("<")) - op = '<'; - else if (nextarg("<=")) - op = 'L' + 'E'; - else if (nextarg("=") || nextarg("==")) - op = '='; - else if (nextarg("!=")) - op = '!'; - else if (nextarg(">=")) - op = 'G' + 'E'; - else if (nextarg(">")) - op = '>'; - else - return l; + op = nextarg("<"); + if (!op) { op = nextarg("<="); + if (!op) { op = nextarg("="); + if (!op) { op = nextarg("=="); + if (!op) { op = nextarg("!="); + if (!op) { op = nextarg(">="); + if (!op) { op = nextarg(">"); + if (!op) return l; + }}}}}} G.args++; r = eval3(); toarith(l); @@ -498,7 +495,7 @@ int expr_main(int argc, char **argv) if (*G.args) bb_error_msg_and_die("syntax error"); - if (v->type == integer) + if (v->type == INTEGER) printf("%" PF_REZ "d\n", PF_REZ_TYPE v->u.i); else puts(v->u.s); diff --git a/testsuite/expr/expr-big b/testsuite/expr/expr-big new file mode 100644 index 000000000..23dbbb3c8 --- /dev/null +++ b/testsuite/expr/expr-big @@ -0,0 +1,16 @@ +# busybox expr + +# 3*1000*1000*1000 overflows 32-bit signed int +res=`busybox expr 0 '<' 3000000000` +[ x"$res" = x1 ] || exit 1 + +# 9223372036854775807 = 2^31-1 +res=`busybox expr 0 '<' 9223372036854775807` +[ x"$res" = x1 ] || exit 1 +# coreutils fails this one! +res=`busybox expr -9223372036854775800 '<' 9223372036854775807` +[ x"$res" = x1 ] || exit 1 + +# This one works only by chance +# res=`busybox expr 0 '<' 9223372036854775808` +# [ x"$res" = x1 ] || exit 1