awk: tighten rules in action parsing
Disallow: BEGIN { action } - must start on the same line Disallow: func f() print "hello" - must be in {...} function old new delta chain_until_rbrace - 41 +41 parse_program 307 336 +29 chain_group 649 616 -33 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 1/1 up/down: 70/-33) Total: 37 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
717200eb43
commit
2b65e73db3
108
editors/awk.c
108
editors/awk.c
@ -1549,29 +1549,35 @@ static node *chain_loop(node *nn)
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void chain_until_rbrace(void)
|
||||||
|
{
|
||||||
|
uint32_t tc;
|
||||||
|
while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
|
||||||
|
debug_printf_parse("%s: !TC_RBRACE\n", __func__);
|
||||||
|
if (tc == TC_NEWLINE)
|
||||||
|
continue;
|
||||||
|
rollback_token();
|
||||||
|
chain_group();
|
||||||
|
}
|
||||||
|
debug_printf_parse("%s: TC_RBRACE\n", __func__);
|
||||||
|
}
|
||||||
|
|
||||||
/* parse group and attach it to chain */
|
/* parse group and attach it to chain */
|
||||||
static void chain_group(void)
|
static void chain_group(void)
|
||||||
{
|
{
|
||||||
uint32_t c;
|
uint32_t tc;
|
||||||
node *n, *n2, *n3;
|
node *n, *n2, *n3;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
c = next_token(TS_GRPSEQ);
|
tc = next_token(TS_GRPSEQ);
|
||||||
} while (c & TC_NEWLINE);
|
} while (tc == TC_NEWLINE);
|
||||||
|
|
||||||
if (c & TC_LBRACE) {
|
if (tc == TC_LBRACE) {
|
||||||
debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
||||||
while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
|
chain_until_rbrace();
|
||||||
debug_printf_parse("%s: !TC_RBRACE\n", __func__);
|
|
||||||
if (c & TC_NEWLINE)
|
|
||||||
continue;
|
|
||||||
rollback_token();
|
|
||||||
chain_group();
|
|
||||||
}
|
|
||||||
debug_printf_parse("%s: TC_RBRACE\n", __func__);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (c & (TS_OPSEQ | TS_OPTERM)) {
|
if (tc & (TS_OPSEQ | TS_OPTERM)) {
|
||||||
debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
|
debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
|
||||||
rollback_token();
|
rollback_token();
|
||||||
chain_expr(OC_EXEC | Vx);
|
chain_expr(OC_EXEC | Vx);
|
||||||
@ -1675,37 +1681,48 @@ static void chain_group(void)
|
|||||||
|
|
||||||
static void parse_program(char *p)
|
static void parse_program(char *p)
|
||||||
{
|
{
|
||||||
uint32_t tclass;
|
|
||||||
node *cn;
|
|
||||||
func *f;
|
|
||||||
var *v;
|
|
||||||
|
|
||||||
debug_printf_parse("%s()\n", __func__);
|
debug_printf_parse("%s()\n", __func__);
|
||||||
|
|
||||||
g_pos = p;
|
g_pos = p;
|
||||||
t_lineno = 1;
|
t_lineno = 1;
|
||||||
while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
|
for (;;) {
|
||||||
TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
|
uint32_t tclass;
|
||||||
|
|
||||||
if (tclass & TS_OPTERM) {
|
tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
|
||||||
|
TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL);
|
||||||
|
|
||||||
|
if (tclass == TC_EOF) {
|
||||||
|
debug_printf_parse("%s: TC_EOF\n", __func__);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (tclass & TS_OPTERM) { /* ; or <newline> */
|
||||||
debug_printf_parse("%s: TS_OPTERM\n", __func__);
|
debug_printf_parse("%s: TS_OPTERM\n", __func__);
|
||||||
|
//NB: gawk allows many newlines, but does not allow more than one semicolon:
|
||||||
|
// BEGIN {...}<newline>;<newline>;
|
||||||
|
//would complain "each rule must have a pattern or an action part".
|
||||||
|
//Same message for
|
||||||
|
// ; BEGIN {...}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (tclass == TC_BEGIN) {
|
||||||
seq = &mainseq;
|
|
||||||
if (tclass & TC_BEGIN) {
|
|
||||||
debug_printf_parse("%s: TC_BEGIN\n", __func__);
|
debug_printf_parse("%s: TC_BEGIN\n", __func__);
|
||||||
seq = &beginseq;
|
seq = &beginseq;
|
||||||
//TODO: ensure there is no newline between BEGIN and {
|
/* ensure there is no newline between BEGIN and { */
|
||||||
//next_token(TC_LBRACE); rollback_token();
|
next_token(TC_LBRACE);
|
||||||
chain_group();
|
chain_until_rbrace();
|
||||||
} else if (tclass & TC_END) {
|
continue;
|
||||||
|
}
|
||||||
|
if (tclass == TC_END) {
|
||||||
debug_printf_parse("%s: TC_END\n", __func__);
|
debug_printf_parse("%s: TC_END\n", __func__);
|
||||||
seq = &endseq;
|
seq = &endseq;
|
||||||
//TODO: ensure there is no newline between END and {
|
/* ensure there is no newline between END and { */
|
||||||
//next_token(TC_LBRACE); rollback_token();
|
next_token(TC_LBRACE);
|
||||||
chain_group();
|
chain_until_rbrace();
|
||||||
} else if (tclass & TC_FUNCDECL) {
|
continue;
|
||||||
|
}
|
||||||
|
if (tclass == TC_FUNCDECL) {
|
||||||
|
func *f;
|
||||||
|
|
||||||
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
|
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
|
||||||
next_token(TC_FUNCTION);
|
next_token(TC_FUNCTION);
|
||||||
f = newfunc(t_string);
|
f = newfunc(t_string);
|
||||||
@ -1716,6 +1733,7 @@ static void parse_program(char *p)
|
|||||||
//f->nargs = 0; - already is
|
//f->nargs = 0; - already is
|
||||||
/* func arg list: comma sep list of args, and a close paren */
|
/* func arg list: comma sep list of args, and a close paren */
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
var *v;
|
||||||
if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
|
if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
|
||||||
if (f->nargs == 0)
|
if (f->nargs == 0)
|
||||||
break; /* func() is ok */
|
break; /* func() is ok */
|
||||||
@ -1730,31 +1748,37 @@ static void parse_program(char *p)
|
|||||||
/* it was a comma, we ate it */
|
/* it was a comma, we ate it */
|
||||||
}
|
}
|
||||||
seq = &f->body;
|
seq = &f->body;
|
||||||
//TODO: ensure there is { after "func F(...)" - but newlines are allowed
|
/* ensure there is { after "func F(...)" - but newlines are allowed */
|
||||||
//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token();
|
while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
|
||||||
chain_group();
|
continue;
|
||||||
|
chain_until_rbrace();
|
||||||
hash_clear(ahash);
|
hash_clear(ahash);
|
||||||
} else if (tclass & TS_OPSEQ) {
|
continue;
|
||||||
|
}
|
||||||
|
seq = &mainseq;
|
||||||
|
if (tclass & TS_OPSEQ) {
|
||||||
|
node *cn;
|
||||||
|
|
||||||
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
|
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
|
||||||
rollback_token();
|
rollback_token();
|
||||||
cn = chain_node(OC_TEST);
|
cn = chain_node(OC_TEST);
|
||||||
cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
|
cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
|
||||||
if (t_tclass & TC_LBRACE) {
|
if (t_tclass == TC_LBRACE) {
|
||||||
debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
||||||
rollback_token();
|
rollback_token();
|
||||||
chain_group();
|
chain_group();
|
||||||
} else {
|
} else {
|
||||||
|
/* no action, assume default "{ print }" */
|
||||||
debug_printf_parse("%s: !TC_LBRACE\n", __func__);
|
debug_printf_parse("%s: !TC_LBRACE\n", __func__);
|
||||||
chain_node(OC_PRINT);
|
chain_node(OC_PRINT);
|
||||||
}
|
}
|
||||||
cn->r.n = mainseq.last;
|
cn->r.n = mainseq.last;
|
||||||
} else /* if (tclass & TC_LBRACE) */ {
|
continue;
|
||||||
debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
|
|
||||||
rollback_token();
|
|
||||||
chain_group();
|
|
||||||
}
|
}
|
||||||
|
/* tclass == TC_LBRACE */
|
||||||
|
debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
|
||||||
|
chain_until_rbrace();
|
||||||
}
|
}
|
||||||
debug_printf_parse("%s: TC_EOF\n", __func__);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user