awk: get rid of "move name one char back" trick in next_token()

function                                             old     new   delta
next_token                                           791     812     +21
awk_main                                             886     831     -55
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 21/-55)            Total: -34 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2021-06-29 03:27:07 +02:00
parent f414fb4411
commit 4f27503a1e

View File

@ -535,6 +535,7 @@ struct globals {
var *Fields; var *Fields;
nvblock *g_cb; nvblock *g_cb;
char *g_pos; char *g_pos;
char g_saved_ch;
smallint icase; smallint icase;
smallint exiting; smallint exiting;
smallint nextrec; smallint nextrec;
@ -599,6 +600,7 @@ struct globals2 {
#define Fields (G1.Fields ) #define Fields (G1.Fields )
#define g_cb (G1.g_cb ) #define g_cb (G1.g_cb )
#define g_pos (G1.g_pos ) #define g_pos (G1.g_pos )
#define g_saved_ch (G1.g_saved_ch )
#define icase (G1.icase ) #define icase (G1.icase )
#define exiting (G1.exiting ) #define exiting (G1.exiting )
#define nextrec (G1.nextrec ) #define nextrec (G1.nextrec )
@ -1125,6 +1127,10 @@ static uint32_t next_token(uint32_t expected)
t_info = save_info; t_info = save_info;
} else { } else {
p = g_pos; p = g_pos;
if (g_saved_ch != '\0') {
*p = g_saved_ch;
g_saved_ch = '\0';
}
readnext: readnext:
p = skip_spaces(p); p = skip_spaces(p);
g_lineno = t_lineno; g_lineno = t_lineno;
@ -1183,6 +1189,8 @@ static uint32_t next_token(uint32_t expected)
tc = TC_NUMBER; tc = TC_NUMBER;
debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
} else { } else {
char *end_of_name;
if (*p == '\n') if (*p == '\n')
t_lineno++; t_lineno++;
@ -1219,16 +1227,14 @@ static uint32_t next_token(uint32_t expected)
if (!isalnum_(*p)) if (!isalnum_(*p))
syntax_error(EMSG_UNEXP_TOKEN); /* no */ syntax_error(EMSG_UNEXP_TOKEN); /* no */
/* yes */ /* yes */
/* "move name one char back" trick: we need a byte for NUL terminator */ t_string = p;
/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */ while (isalnum_(*p))
t_string = --p; p++;
while (isalnum_(*++p)) { end_of_name = p;
p[-1] = *p;
}
p[-1] = '\0';
tc = TC_VARIABLE; tc = TC_VARIABLE;
/* also consume whitespace between functionname and bracket */ /* also consume whitespace between functionname and bracket */
if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters?
p = skip_spaces(p); p = skip_spaces(p);
if (*p == '(') { if (*p == '(') {
p++; p++;
@ -1240,8 +1246,20 @@ static uint32_t next_token(uint32_t expected)
debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
} else { } else {
debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
if (end_of_name == p) {
/* there is no space for trailing NUL in t_string!
* We need to save the char we are going to NUL.
* (we'll use it in future call to next_token())
*/
g_saved_ch = *end_of_name;
// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
// '.' and analyze it later: we also have to *store it back* in next
// next_token(), in order to give my_strtod() the undamaged ".2" string.
} }
} }
*end_of_name = '\0'; /* terminate t_string */
}
token_found: token_found:
g_pos = p; g_pos = p;
@ -3420,38 +3438,20 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
g_progname = llist_pop(&list_f); g_progname = llist_pop(&list_f);
fd = xopen_stdin(g_progname); fd = xopen_stdin(g_progname);
/* 1st byte is reserved for "move name one char back" trick in next_token */ s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
i = 1;
s = NULL;
for (;;) {
int sz;
s = xrealloc(s, i + 1000);
sz = safe_read(fd, s + i, 1000);
if (sz <= 0)
break;
i += sz;
}
s = xrealloc(s, i + 1); /* trim unused 999 bytes */
s[i] = '\0';
close(fd); close(fd);
parse_program(s + 1); parse_program(s);
free(s); free(s);
} }
g_progname = "cmd. line"; g_progname = "cmd. line";
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
while (list_e) { while (list_e) {
/* NB: "move name one char back" trick in next_token
* can use argv[i][-1] here.
*/
parse_program(llist_pop(&list_e)); parse_program(llist_pop(&list_e));
} }
#endif #endif
if (!(opt & (OPT_f | OPT_e))) { if (!(opt & (OPT_f | OPT_e))) {
if (!*argv) if (!*argv)
bb_show_usage(); bb_show_usage();
/* NB: "move name one char back" trick in next_token
* can use argv[i][-1] here.
*/
parse_program(*argv++); parse_program(*argv++);
} }