awk: support "length" form of "length()". Closes 8371

function                                             old     new   delta
parse_expr                                           805     848     +43
tokenlist                                            447     448      +1

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2015-10-02 02:41:39 +02:00
parent c919d561ad
commit 28b00ce6ff
2 changed files with 72 additions and 48 deletions

View File

@ -207,7 +207,7 @@ typedef struct tsplitter_s {
/* simple token classes */ /* simple token classes */
/* Order and hex values are very important!!! See next_token() */ /* Order and hex values are very important!!! See next_token() */
#define TC_SEQSTART 1 /* ( */ #define TC_SEQSTART (1 << 0) /* ( */
#define TC_SEQTERM (1 << 1) /* ) */ #define TC_SEQTERM (1 << 1) /* ) */
#define TC_REGEXP (1 << 2) /* /.../ */ #define TC_REGEXP (1 << 2) /* /.../ */
#define TC_OUTRDR (1 << 3) /* | > >> */ #define TC_OUTRDR (1 << 3) /* | > >> */
@ -227,16 +227,22 @@ typedef struct tsplitter_s {
#define TC_WHILE (1 << 17) #define TC_WHILE (1 << 17)
#define TC_ELSE (1 << 18) #define TC_ELSE (1 << 18)
#define TC_BUILTIN (1 << 19) #define TC_BUILTIN (1 << 19)
#define TC_GETLINE (1 << 20) /* This costs ~50 bytes of code.
#define TC_FUNCDECL (1 << 21) /* `function' `func' */ * A separate class to support deprecated "length" form. If we don't need that
#define TC_BEGIN (1 << 22) * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
#define TC_END (1 << 23) * can be merged with TC_BUILTIN:
#define TC_EOF (1 << 24) */
#define TC_VARIABLE (1 << 25) #define TC_LENGTH (1 << 20)
#define TC_ARRAY (1 << 26) #define TC_GETLINE (1 << 21)
#define TC_FUNCTION (1 << 27) #define TC_FUNCDECL (1 << 22) /* `function' `func' */
#define TC_STRING (1 << 28) #define TC_BEGIN (1 << 23)
#define TC_NUMBER (1 << 29) #define TC_END (1 << 24)
#define TC_EOF (1 << 25)
#define TC_VARIABLE (1 << 26)
#define TC_ARRAY (1 << 27)
#define TC_FUNCTION (1 << 28)
#define TC_STRING (1 << 29)
#define TC_NUMBER (1 << 30)
#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
@ -244,14 +250,16 @@ typedef struct tsplitter_s {
#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
| TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER) | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
| TC_SEQSTART | TC_STRING | TC_NUMBER)
#define TC_STATEMNT (TC_STATX | TC_WHILE) #define TC_STATEMNT (TC_STATX | TC_WHILE)
#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
/* word tokens, cannot mean something else if not expected */ /* word tokens, cannot mean something else if not expected */
#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \ #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
| TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END) | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
| TC_FUNCDECL | TC_BEGIN | TC_END)
/* discard newlines after these */ /* discard newlines after these */
#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
@ -346,54 +354,54 @@ enum {
#define NTC "\377" /* switch to next token class (tc<<1) */ #define NTC "\377" /* switch to next token class (tc<<1) */
#define NTCC '\377' #define NTCC '\377'
#define OC_B OC_BUILTIN
static const char tokenlist[] ALIGN1 = static const char tokenlist[] ALIGN1 =
"\1(" NTC "\1(" NTC /* TC_SEQSTART */
"\1)" NTC "\1)" NTC /* TC_SEQTERM */
"\1/" NTC /* REGEXP */ "\1/" NTC /* TC_REGEXP */
"\2>>" "\1>" "\1|" NTC /* OUTRDR */ "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
"\2++" "\2--" NTC /* UOPPOST */ "\2++" "\2--" NTC /* TC_UOPPOST */
"\2++" "\2--" "\1$" NTC /* UOPPRE1 */ "\2++" "\2--" "\1$" NTC /* TC_UOPPRE1 */
"\2==" "\1=" "\2+=" "\2-=" /* BINOPX */ "\2==" "\1=" "\2+=" "\2-=" /* TC_BINOPX */
"\2*=" "\2/=" "\2%=" "\2^=" "\2*=" "\2/=" "\2%=" "\2^="
"\1+" "\1-" "\3**=" "\2**" "\1+" "\1-" "\3**=" "\2**"
"\1/" "\1%" "\1^" "\1*" "\1/" "\1%" "\1^" "\1*"
"\2!=" "\2>=" "\2<=" "\1>" "\2!=" "\2>=" "\2<=" "\1>"
"\1<" "\2!~" "\1~" "\2&&" "\1<" "\2!~" "\1~" "\2&&"
"\2||" "\1?" "\1:" NTC "\2||" "\1?" "\1:" NTC
"\2in" NTC "\2in" NTC /* TC_IN */
"\1," NTC "\1," NTC /* TC_COMMA */
"\1|" NTC "\1|" NTC /* TC_PIPE */
"\1+" "\1-" "\1!" NTC /* UOPPRE2 */ "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
"\1]" NTC "\1]" NTC /* TC_ARRTERM */
"\1{" NTC "\1{" NTC /* TC_GRPSTART */
"\1}" NTC "\1}" NTC /* TC_GRPTERM */
"\1;" NTC "\1;" NTC /* TC_SEMICOL */
"\1\n" NTC "\1\n" NTC /* TC_NEWLINE */
"\2if" "\2do" "\3for" "\5break" /* STATX */ "\2if" "\2do" "\3for" "\5break" /* TC_STATX */
"\10continue" "\6delete" "\5print" "\10continue" "\6delete" "\5print"
"\6printf" "\4next" "\10nextfile" "\6printf" "\4next" "\10nextfile"
"\6return" "\4exit" NTC "\6return" "\4exit" NTC
"\5while" NTC "\5while" NTC /* TC_WHILE */
"\4else" NTC "\4else" NTC /* TC_ELSE */
"\3and" "\5compl" "\6lshift" "\2or" /* TC_BUILTIN */
"\3and" "\5compl" "\6lshift" "\2or"
"\6rshift" "\3xor" "\6rshift" "\3xor"
"\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */ "\5close" "\6system" "\6fflush" "\5atan2"
"\3cos" "\3exp" "\3int" "\3log" "\3cos" "\3exp" "\3int" "\3log"
"\4rand" "\3sin" "\4sqrt" "\5srand" "\4rand" "\3sin" "\4sqrt" "\5srand"
"\6gensub" "\4gsub" "\5index" "\6length" "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
"\5match" "\5split" "\7sprintf" "\3sub" "\5match" "\5split" "\7sprintf" "\3sub"
"\6substr" "\7systime" "\10strftime" "\6mktime" "\6substr" "\7systime" "\10strftime" "\6mktime"
"\7tolower" "\7toupper" NTC "\7tolower" "\7toupper" NTC
"\7getline" NTC "\6length" NTC /* TC_LENGTH */
"\4func" "\10function" NTC "\7getline" NTC /* TC_GETLINE */
"\5BEGIN" NTC "\4func" "\10function" NTC /* TC_FUNCDECL */
"\3END" "\5BEGIN" NTC /* TC_BEGIN */
"\3END" /* TC_END */
/* compiler adds trailing "\0" */ /* compiler adds trailing "\0" */
; ;
#define OC_B OC_BUILTIN
static const uint32_t tokeninfo[] = { static const uint32_t tokeninfo[] = {
0, 0,
0, 0,
@ -408,7 +416,7 @@ static const uint32_t tokeninfo[] = {
OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
OC_IN|SV|P(49), /* in */ OC_IN|SV|P(49), /* TC_IN */
OC_COMMA|SS|P(80), OC_COMMA|SS|P(80),
OC_PGETLINE|SV|P(37), OC_PGETLINE|SV|P(37),
OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
@ -423,20 +431,20 @@ static const uint32_t tokeninfo[] = {
OC_RETURN|Vx, OC_EXIT|Nx, OC_RETURN|Vx, OC_EXIT|Nx,
ST_WHILE, ST_WHILE,
0, /* else */ 0, /* else */
OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le, OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
OC_GETLINE|SV|P(0), OC_GETLINE|SV|P(0),
0, 0, 0, 0,
0, 0,
0 /* END */ 0 /* TC_END */
}; };
/* internal variable names and their initial values */ /* internal variable names and their initial values */
@ -1202,9 +1210,10 @@ static uint32_t next_token(uint32_t expected)
ltclass = t_tclass; ltclass = t_tclass;
/* Are we ready for this? */ /* Are we ready for this? */
if (!(ltclass & expected)) if (!(ltclass & expected)) {
syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
}
return ltclass; return ltclass;
#undef concat_inserted #undef concat_inserted
@ -1371,6 +1380,16 @@ static node *parse_expr(uint32_t iexp)
debug_printf_parse("%s: TC_BUILTIN\n", __func__); debug_printf_parse("%s: TC_BUILTIN\n", __func__);
cn->l.n = condition(); cn->l.n = condition();
break; break;
case TC_LENGTH:
debug_printf_parse("%s: TC_LENGTH\n", __func__);
next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM);
rollback_token();
if (t_tclass & TC_SEQSTART) {
/* It was a "(" token. Handle just like TC_BUILTIN */
cn->l.n = condition();
}
break;
} }
} }
} }

View File

@ -281,6 +281,11 @@ testing "awk length(array)" \
"2\n" \ "2\n" \
"" "" "" ""
testing "awk length()" \
"awk '{print length; print length(); print length(\"qwe\"); print length(99+9)}'" \
"3\n3\n3\n3\n" \
"" "qwe"
testing "awk -f and ARGC" \ testing "awk -f and ARGC" \
"awk -f - input" \ "awk -f - input" \
"re\n2\n" \ "re\n2\n" \