awk: tighten parsing - disallow extra semicolons
'; BEGIN {...}' and 'BEGIN {...} ;; {...}' are not accepted by gawk function old new delta parse_program 332 353 +21 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
@@ -1634,7 +1634,7 @@ static void chain_group(void)
|
|||||||
debug_printf_parse("%s: ST_FOR\n", __func__);
|
debug_printf_parse("%s: ST_FOR\n", __func__);
|
||||||
next_token(TC_LPAREN);
|
next_token(TC_LPAREN);
|
||||||
n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
|
n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
|
||||||
if (t_tclass & TC_RPAREN) { /* for-in */
|
if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */
|
||||||
if (!n2 || n2->info != TI_IN)
|
if (!n2 || n2->info != TI_IN)
|
||||||
syntax_error(EMSG_UNEXP_TOKEN);
|
syntax_error(EMSG_UNEXP_TOKEN);
|
||||||
n = chain_node(OC_WALKINIT | VV);
|
n = chain_node(OC_WALKINIT | VV);
|
||||||
@@ -1700,20 +1700,15 @@ static void parse_program(char *p)
|
|||||||
for (;;) {
|
for (;;) {
|
||||||
uint32_t tclass;
|
uint32_t tclass;
|
||||||
|
|
||||||
tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
|
tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
|
||||||
TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL);
|
| TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
|
||||||
|
got_tok:
|
||||||
if (tclass == TC_EOF) {
|
if (tclass == TC_EOF) {
|
||||||
debug_printf_parse("%s: TC_EOF\n", __func__);
|
debug_printf_parse("%s: TC_EOF\n", __func__);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (tclass & (TC_SEMICOL | TC_NEWLINE)) {
|
if (tclass == TC_NEWLINE) {
|
||||||
debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__);
|
debug_printf_parse("%s: TC_NEWLINE\n", __func__);
|
||||||
//NB: gawk allows many newlines, but does not allow more than one semicolon:
|
|
||||||
// BEGIN {...}<newline>;<newline>;
|
|
||||||
//would complain "each rule must have a pattern or an action part".
|
|
||||||
//Same message for
|
|
||||||
// ; BEGIN {...}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (tclass == TC_BEGIN) {
|
if (tclass == TC_BEGIN) {
|
||||||
@@ -1722,7 +1717,7 @@ static void parse_program(char *p)
|
|||||||
/* ensure there is no newline between BEGIN and { */
|
/* ensure there is no newline between BEGIN and { */
|
||||||
next_token(TC_LBRACE);
|
next_token(TC_LBRACE);
|
||||||
chain_until_rbrace();
|
chain_until_rbrace();
|
||||||
continue;
|
goto next_tok;
|
||||||
}
|
}
|
||||||
if (tclass == TC_END) {
|
if (tclass == TC_END) {
|
||||||
debug_printf_parse("%s: TC_END\n", __func__);
|
debug_printf_parse("%s: TC_END\n", __func__);
|
||||||
@@ -1730,7 +1725,7 @@ static void parse_program(char *p)
|
|||||||
/* ensure there is no newline between END and { */
|
/* ensure there is no newline between END and { */
|
||||||
next_token(TC_LBRACE);
|
next_token(TC_LBRACE);
|
||||||
chain_until_rbrace();
|
chain_until_rbrace();
|
||||||
continue;
|
goto next_tok;
|
||||||
}
|
}
|
||||||
if (tclass == TC_FUNCDECL) {
|
if (tclass == TC_FUNCDECL) {
|
||||||
func *f;
|
func *f;
|
||||||
@@ -1765,7 +1760,7 @@ static void parse_program(char *p)
|
|||||||
continue;
|
continue;
|
||||||
chain_until_rbrace();
|
chain_until_rbrace();
|
||||||
hash_clear(ahash);
|
hash_clear(ahash);
|
||||||
continue;
|
goto next_tok;
|
||||||
}
|
}
|
||||||
seq = &mainseq;
|
seq = &mainseq;
|
||||||
if (tclass & TS_OPSEQ) {
|
if (tclass & TS_OPSEQ) {
|
||||||
@@ -1784,12 +1779,25 @@ static void parse_program(char *p)
|
|||||||
chain_node(OC_PRINT);
|
chain_node(OC_PRINT);
|
||||||
}
|
}
|
||||||
cn->r.n = mainseq.last;
|
cn->r.n = mainseq.last;
|
||||||
continue;
|
goto next_tok;
|
||||||
}
|
}
|
||||||
/* tclass == TC_LBRACE */
|
/* tclass == TC_LBRACE */
|
||||||
debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
|
debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
|
||||||
chain_until_rbrace();
|
chain_until_rbrace();
|
||||||
}
|
next_tok:
|
||||||
|
/* Same as next_token() at the top of the loop, + TC_SEMICOL */
|
||||||
|
tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
|
||||||
|
| TC_EOF | TC_NEWLINE | TC_SEMICOL);
|
||||||
|
/* gawk allows many newlines, but does not allow more than one semicolon:
|
||||||
|
* BEGIN {...}<newline>;<newline>;
|
||||||
|
* would complain "each rule must have a pattern or an action part".
|
||||||
|
* Same message for
|
||||||
|
* ; BEGIN {...}
|
||||||
|
*/
|
||||||
|
if (tclass != TC_SEMICOL)
|
||||||
|
goto got_tok; /* use this token */
|
||||||
|
/* else: loop back - ate the semicolon, get and use _next_ token */
|
||||||
|
} /* for (;;) */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user