ash: parser: Get rid of PEOA

Upstream commit:

    Date: Wed, 27 May 2020 12:19:13 +1000
    parser: Get rid of PEOA

    PEOA is a special character used to mark an alias as being finished
    so that we don't enter an infinite loop with nested aliases.  It
    complicates the parser because we have to ensure that it is skipped
    where necessary and not copied to the resulting token text.

    This patch removes it and instead delays the marking of aliases
    until the second pgetc.  This has the same effect as the current
    PEOA code while keeping the complexities within the input code.

This adds ~32 bytes of global data:

function                                             old     new   delta
__pgetc                                                -     512    +512
freestrings                                            -      95     +95
popfile                                               86     110     +24
pushstring                                           141     160     +19
basepf                                                76      84      +8
syntax_index_table                                   258     257      -1
S_I_T                                                 30      28      -2
.rodata                                           104255  104247      -8
pgetc_without_PEOA                                    13       -     -13
xxreadtoken                                          230     215     -15
popstring                                            158     120     -38
readtoken1                                          3110    3045     -65
pgetc                                                547      22    -525
------------------------------------------------------------------------------
(add/remove: 2/1 grow/shrink: 3/7 up/down: 658/-667)           Total: -9 bytes
   text	   data	    bss	    dec	    hex	filename
1043102	    559	   5020	1048681	 100069	busybox_old
1043085	    559	   5052	1048696	 100078	busybox_unstripped

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2021-09-08 09:52:04 +02:00
parent 8c68ae8416
commit 48cb983b13

View File

@ -295,6 +295,10 @@ typedef long arith_t;
# define PIPE_BUF 4096 /* amount of buffering in a pipe */ # define PIPE_BUF 4096 /* amount of buffering in a pipe */
#endif #endif
#ifndef unlikely
# define unlikely(cond) (cond)
#endif
#if !BB_MMU #if !BB_MMU
# error "Do not even bother, ash will not run on NOMMU machine" # error "Do not even bother, ash will not run on NOMMU machine"
#endif #endif
@ -583,6 +587,9 @@ struct strpush {
#endif #endif
char *string; /* remember the string since it may change */ char *string; /* remember the string since it may change */
/* Delay freeing so we can stop nested aliases. */
struct strpush *spfree;
/* Remember last two characters for pungetc. */ /* Remember last two characters for pungetc. */
int lastc[2]; int lastc[2];
@ -605,6 +612,9 @@ struct parsefile {
struct strpush *strpush; /* for pushing strings at this level */ struct strpush *strpush; /* for pushing strings at this level */
struct strpush basestrpush; /* so pushing one is fast */ struct strpush basestrpush; /* so pushing one is fast */
/* Delay freeing so we can stop nested aliases. */
struct strpush *spfree;
/* Remember last two characters for pungetc. */ /* Remember last two characters for pungetc. */
int lastc[2]; int lastc[2];
@ -3013,12 +3023,8 @@ pwdcmd(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
#define CENDFILE 11 /* end of file */ #define CENDFILE 11 /* end of file */
#define CCTL 12 /* like CWORD, except it must be escaped */ #define CCTL 12 /* like CWORD, except it must be escaped */
#define CSPCL 13 /* these terminate a word */ #define CSPCL 13 /* these terminate a word */
#define CIGN 14 /* character should be ignored */
#define PEOF 256 #define PEOF 256
#if ENABLE_ASH_ALIAS
# define PEOA 257
#endif
#define USE_SIT_FUNCTION ENABLE_ASH_OPTIMIZE_FOR_SIZE #define USE_SIT_FUNCTION ENABLE_ASH_OPTIMIZE_FOR_SIZE
@ -3028,49 +3034,43 @@ pwdcmd(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
# define SIT_ITEM(a,b,c,d) (a | (b << 4) | (c << 8)) # define SIT_ITEM(a,b,c,d) (a | (b << 4) | (c << 8))
#endif #endif
static const uint16_t S_I_T[] ALIGN2 = { static const uint16_t S_I_T[] ALIGN2 = {
#if ENABLE_ASH_ALIAS SIT_ITEM(CSPCL , CWORD , CWORD, CWORD ), /* 0, ' ' */
SIT_ITEM(CSPCL , CIGN , CIGN , CIGN ), /* 0, PEOA */ SIT_ITEM(CNL , CNL , CNL , CNL ), /* 1, \n */
#endif SIT_ITEM(CWORD , CCTL , CCTL , CWORD ), /* 2, !*-/:=?[]~ */
SIT_ITEM(CSPCL , CWORD , CWORD, CWORD ), /* 1, ' ' */ SIT_ITEM(CDQUOTE , CENDQUOTE, CWORD, CWORD ), /* 3, '"' */
SIT_ITEM(CNL , CNL , CNL , CNL ), /* 2, \n */ SIT_ITEM(CVAR , CVAR , CWORD, CVAR ), /* 4, $ */
SIT_ITEM(CWORD , CCTL , CCTL , CWORD ), /* 3, !*-/:=?[]~ */ SIT_ITEM(CSQUOTE , CWORD , CENDQUOTE, CWORD), /* 5, "'" */
SIT_ITEM(CDQUOTE , CENDQUOTE, CWORD, CWORD ), /* 4, '"' */ SIT_ITEM(CSPCL , CWORD , CWORD, CLP ), /* 6, ( */
SIT_ITEM(CVAR , CVAR , CWORD, CVAR ), /* 5, $ */ SIT_ITEM(CSPCL , CWORD , CWORD, CRP ), /* 7, ) */
SIT_ITEM(CSQUOTE , CWORD , CENDQUOTE, CWORD), /* 6, "'" */ SIT_ITEM(CBACK , CBACK , CCTL , CBACK ), /* 8, \ */
SIT_ITEM(CSPCL , CWORD , CWORD, CLP ), /* 7, ( */ SIT_ITEM(CBQUOTE , CBQUOTE , CWORD, CBQUOTE), /* 9, ` */
SIT_ITEM(CSPCL , CWORD , CWORD, CRP ), /* 8, ) */ SIT_ITEM(CENDVAR , CENDVAR , CWORD, CENDVAR), /* 10, } */
SIT_ITEM(CBACK , CBACK , CCTL , CBACK ), /* 9, \ */
SIT_ITEM(CBQUOTE , CBQUOTE , CWORD, CBQUOTE), /* 10, ` */
SIT_ITEM(CENDVAR , CENDVAR , CWORD, CENDVAR), /* 11, } */
#if !USE_SIT_FUNCTION #if !USE_SIT_FUNCTION
SIT_ITEM(CENDFILE, CENDFILE , CENDFILE, CENDFILE),/* 12, PEOF */ SIT_ITEM(CENDFILE, CENDFILE , CENDFILE, CENDFILE),/* 11, PEOF */
SIT_ITEM(CWORD , CWORD , CWORD, CWORD ), /* 13, 0-9A-Za-z */ SIT_ITEM(CWORD , CWORD , CWORD, CWORD ), /* 12, 0-9A-Za-z */
SIT_ITEM(CCTL , CCTL , CCTL , CCTL ) /* 14, CTLESC ... */ SIT_ITEM(CCTL , CCTL , CCTL , CCTL ) /* 13, CTLESC ... */
#endif #endif
#undef SIT_ITEM #undef SIT_ITEM
}; };
/* Constants below must match table above */ /* Constants below must match table above */
enum { enum {
#if ENABLE_ASH_ALIAS CSPCL_CWORD_CWORD_CWORD , /* 0 */
CSPCL_CIGN_CIGN_CIGN , /* 0 */ CNL_CNL_CNL_CNL , /* 1 */
#endif CWORD_CCTL_CCTL_CWORD , /* 2 */
CSPCL_CWORD_CWORD_CWORD , /* 1 */ CDQUOTE_CENDQUOTE_CWORD_CWORD , /* 3 */
CNL_CNL_CNL_CNL , /* 2 */ CVAR_CVAR_CWORD_CVAR , /* 4 */
CWORD_CCTL_CCTL_CWORD , /* 3 */ CSQUOTE_CWORD_CENDQUOTE_CWORD , /* 5 */
CDQUOTE_CENDQUOTE_CWORD_CWORD , /* 4 */ CSPCL_CWORD_CWORD_CLP , /* 6 */
CVAR_CVAR_CWORD_CVAR , /* 5 */ CSPCL_CWORD_CWORD_CRP , /* 7 */
CSQUOTE_CWORD_CENDQUOTE_CWORD , /* 6 */ CBACK_CBACK_CCTL_CBACK , /* 8 */
CSPCL_CWORD_CWORD_CLP , /* 7 */ CBQUOTE_CBQUOTE_CWORD_CBQUOTE , /* 9 */
CSPCL_CWORD_CWORD_CRP , /* 8 */ CENDVAR_CENDVAR_CWORD_CENDVAR , /* 10 */
CBACK_CBACK_CCTL_CBACK , /* 9 */ CENDFILE_CENDFILE_CENDFILE_CENDFILE, /* 11 */
CBQUOTE_CBQUOTE_CWORD_CBQUOTE , /* 10 */ CWORD_CWORD_CWORD_CWORD , /* 12 */
CENDVAR_CENDVAR_CWORD_CENDVAR , /* 11 */ CCTL_CCTL_CCTL_CCTL , /* 13 */
CENDFILE_CENDFILE_CENDFILE_CENDFILE, /* 12 */
CWORD_CWORD_CWORD_CWORD , /* 13 */
CCTL_CCTL_CCTL_CCTL , /* 14 */
}; };
/* c in SIT(c, syntax) must be an *unsigned char* or PEOA or PEOF, /* c in SIT(c, syntax) must be an *unsigned char* or PEOF,
* caller must ensure proper cast on it if c is *char_ptr! * caller must ensure proper cast on it if c is *char_ptr!
*/ */
#if USE_SIT_FUNCTION #if USE_SIT_FUNCTION
@ -3088,44 +3088,28 @@ SIT(int c, int syntax)
* but glibc one isn't. With '/' always treated as CWORD, * but glibc one isn't. With '/' always treated as CWORD,
* both work fine. * both work fine.
*/ */
# if ENABLE_ASH_ALIAS
static const uint8_t syntax_index_table[] ALIGN1 = {
1, 2, 1, 3, 4, 5, 1, 6, /* "\t\n !\"$&'" */
7, 8, 3, 3,/*3,*/3, 1, 1, /* "()*-/:;<" */
3, 1, 3, 3, 9, 3, 10, 1, /* "=>?[\\]`|" */
11, 3 /* "}~" */
};
# else
static const uint8_t syntax_index_table[] ALIGN1 = { static const uint8_t syntax_index_table[] ALIGN1 = {
0, 1, 0, 2, 3, 4, 0, 5, /* "\t\n !\"$&'" */ 0, 1, 0, 2, 3, 4, 0, 5, /* "\t\n !\"$&'" */
6, 7, 2, 2,/*2,*/2, 0, 0, /* "()*-/:;<" */ 6, 7, 2, 2,/*2,*/2, 0, 0, /* "()*-/:;<" */
2, 0, 2, 2, 8, 2, 9, 0, /* "=>?[\\]`|" */ 2, 0, 2, 2, 8, 2, 9, 0, /* "=>?[\\]`|" */
10, 2 /* "}~" */ 10, 2 /* "}~" */
}; };
# endif
const char *s; const char *s;
int indx; int indx;
if (c == PEOF) if (c == PEOF)
return CENDFILE; return CENDFILE;
# if ENABLE_ASH_ALIAS /* Cast is purely for paranoia here,
if (c == PEOA) * just in case someone passed signed char to us */
indx = 0; if ((unsigned char)c >= CTL_FIRST
else && (unsigned char)c <= CTL_LAST
# endif ) {
{ return CCTL;
/* Cast is purely for paranoia here,
* just in case someone passed signed char to us */
if ((unsigned char)c >= CTL_FIRST
&& (unsigned char)c <= CTL_LAST
) {
return CCTL;
}
s = strchrnul(spec_symbls, c);
if (*s == '\0')
return CWORD;
indx = syntax_index_table[s - spec_symbls];
} }
s = strchrnul(spec_symbls, c);
if (*s == '\0')
return CWORD;
indx = syntax_index_table[s - spec_symbls];
return (S_I_T[indx] >> (syntax*4)) & 0xf; return (S_I_T[indx] >> (syntax*4)) & 0xf;
} }
@ -3396,9 +3380,6 @@ static const uint8_t syntax_index_table[] ALIGN1 = {
/* 254 */ CWORD_CWORD_CWORD_CWORD, /* 254 */ CWORD_CWORD_CWORD_CWORD,
/* 255 */ CWORD_CWORD_CWORD_CWORD, /* 255 */ CWORD_CWORD_CWORD_CWORD,
/* PEOF */ CENDFILE_CENDFILE_CENDFILE_CENDFILE, /* PEOF */ CENDFILE_CENDFILE_CENDFILE_CENDFILE,
# if ENABLE_ASH_ALIAS
/* PEOA */ CSPCL_CIGN_CIGN_CIGN,
# endif
}; };
#if 1 #if 1
@ -10712,7 +10693,7 @@ pushstring(char *s, struct alias *ap)
len = strlen(s); len = strlen(s);
INT_OFF; INT_OFF;
if (g_parsefile->strpush) { if (g_parsefile->strpush || g_parsefile->spfree) {
sp = ckzalloc(sizeof(*sp)); sp = ckzalloc(sizeof(*sp));
sp->prev = g_parsefile->strpush; sp->prev = g_parsefile->strpush;
} else { } else {
@ -10722,6 +10703,7 @@ pushstring(char *s, struct alias *ap)
sp->prev_string = g_parsefile->next_to_pgetc; sp->prev_string = g_parsefile->next_to_pgetc;
sp->prev_left_in_line = g_parsefile->left_in_line; sp->prev_left_in_line = g_parsefile->left_in_line;
sp->unget = g_parsefile->unget; sp->unget = g_parsefile->unget;
sp->spfree = g_parsefile->spfree;
memcpy(sp->lastc, g_parsefile->lastc, sizeof(sp->lastc)); memcpy(sp->lastc, g_parsefile->lastc, sizeof(sp->lastc));
#if ENABLE_ASH_ALIAS #if ENABLE_ASH_ALIAS
sp->ap = ap; sp->ap = ap;
@ -10733,11 +10715,11 @@ pushstring(char *s, struct alias *ap)
g_parsefile->next_to_pgetc = s; g_parsefile->next_to_pgetc = s;
g_parsefile->left_in_line = len; g_parsefile->left_in_line = len;
g_parsefile->unget = 0; g_parsefile->unget = 0;
g_parsefile->spfree = NULL;
INT_ON; INT_ON;
} }
static void static void popstring(void)
popstring(void)
{ {
struct strpush *sp = g_parsefile->strpush; struct strpush *sp = g_parsefile->strpush;
@ -10752,10 +10734,6 @@ popstring(void)
if (sp->string != sp->ap->val) { if (sp->string != sp->ap->val) {
free(sp->string); free(sp->string);
} }
sp->ap->flag &= ~ALIASINUSE;
if (sp->ap->flag & ALIASDEAD) {
unalias(sp->ap->name);
}
} }
#endif #endif
g_parsefile->next_to_pgetc = sp->prev_string; g_parsefile->next_to_pgetc = sp->prev_string;
@ -10763,8 +10741,7 @@ popstring(void)
g_parsefile->unget = sp->unget; g_parsefile->unget = sp->unget;
memcpy(g_parsefile->lastc, sp->lastc, sizeof(sp->lastc)); memcpy(g_parsefile->lastc, sp->lastc, sizeof(sp->lastc));
g_parsefile->strpush = sp->prev; g_parsefile->strpush = sp->prev;
if (sp != &(g_parsefile->basestrpush)) g_parsefile->spfree = sp;
free(sp);
INT_ON; INT_ON;
} }
@ -10853,26 +10830,16 @@ preadfd(void)
*/ */
//#define pgetc_debug(...) bb_error_msg(__VA_ARGS__) //#define pgetc_debug(...) bb_error_msg(__VA_ARGS__)
#define pgetc_debug(...) ((void)0) #define pgetc_debug(...) ((void)0)
static int pgetc(void); static int __pgetc(void);
static int static int
preadbuffer(void) preadbuffer(void)
{ {
char *q; char *q;
int more; int more;
if (g_parsefile->strpush) { if (unlikely(g_parsefile->strpush)) {
#if ENABLE_ASH_ALIAS
if (g_parsefile->left_in_line == -1
&& g_parsefile->strpush->ap
&& g_parsefile->next_to_pgetc[-1] != ' '
&& g_parsefile->next_to_pgetc[-1] != '\t'
) {
pgetc_debug("preadbuffer PEOA");
return PEOA;
}
#endif
popstring(); popstring();
return pgetc(); return __pgetc();
} }
/* on both branches above g_parsefile->left_in_line < 0. /* on both branches above g_parsefile->left_in_line < 0.
* "pgetc" needs refilling. * "pgetc" needs refilling.
@ -10966,8 +10933,31 @@ nlnoprompt(void)
needprompt = doprompt; needprompt = doprompt;
} }
static int static void freestrings(struct strpush *sp)
pgetc(void) {
INT_OFF;
do {
struct strpush *psp;
if (sp->ap) {
sp->ap->flag &= ~ALIASINUSE;
if (sp->ap->flag & ALIASDEAD) {
unalias(sp->ap->name);
}
}
psp = sp;
sp = sp->spfree;
if (psp != &(g_parsefile->basestrpush))
free(psp);
} while (sp);
g_parsefile->spfree = NULL;
INT_ON;
}
static int __pgetc(void)
{ {
int c; int c;
@ -10989,23 +10979,19 @@ pgetc(void)
return c; return c;
} }
#if ENABLE_ASH_ALIAS /*
static int * Read a character from the script, returning PEOF on end of file.
pgetc_without_PEOA(void) * Nul characters in the input are silently discarded.
*/
static int pgetc(void)
{ {
int c; struct strpush *sp = g_parsefile->spfree;
do {
pgetc_debug("pgetc at %d:%p'%s'", if (unlikely(sp))
g_parsefile->left_in_line, freestrings(sp);
g_parsefile->next_to_pgetc,
g_parsefile->next_to_pgetc); return __pgetc();
c = pgetc();
} while (c == PEOA);
return c;
} }
#else
# define pgetc_without_PEOA() pgetc()
#endif
/* /*
* Undo a call to pgetc. Only two characters may be pushed back. * Undo a call to pgetc. Only two characters may be pushed back.
@ -11082,6 +11068,7 @@ pushfile(void)
pf->prev = g_parsefile; pf->prev = g_parsefile;
pf->pf_fd = -1; pf->pf_fd = -1;
/*pf->strpush = NULL; - ckzalloc did it */ /*pf->strpush = NULL; - ckzalloc did it */
/*pf->spfree = NULL;*/
/*pf->basestrpush.prev = NULL;*/ /*pf->basestrpush.prev = NULL;*/
/*pf->unget = 0;*/ /*pf->unget = 0;*/
g_parsefile = pf; g_parsefile = pf;
@ -11099,8 +11086,12 @@ popfile(void)
if (pf->pf_fd >= 0) if (pf->pf_fd >= 0)
close(pf->pf_fd); close(pf->pf_fd);
free(pf->buf); free(pf->buf);
while (pf->strpush) if (g_parsefile->spfree)
freestrings(g_parsefile->spfree);
while (pf->strpush) {
popstring(); popstring();
freestrings(g_parsefile->spfree);
}
g_parsefile = pf->prev; g_parsefile = pf->prev;
free(pf); free(pf);
INT_ON; INT_ON;
@ -12390,7 +12381,7 @@ static int
readtoken1(int c, int syntax, char *eofmark, int striptabs) readtoken1(int c, int syntax, char *eofmark, int striptabs)
{ {
/* NB: syntax parameter fits into smallint */ /* NB: syntax parameter fits into smallint */
/* c parameter is an unsigned char or PEOF or PEOA */ /* c parameter is an unsigned char or PEOF */
char *out; char *out;
size_t len; size_t len;
struct nodelist *bqlist; struct nodelist *bqlist;
@ -12460,7 +12451,7 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
USTPUTC(c, out); USTPUTC(c, out);
break; break;
case CBACK: /* backslash */ case CBACK: /* backslash */
c = pgetc_without_PEOA(); c = pgetc();
if (c == PEOF) { if (c == PEOF) {
USTPUTC(CTLESC, out); USTPUTC(CTLESC, out);
USTPUTC('\\', out); USTPUTC('\\', out);
@ -12567,8 +12558,6 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
break; break;
case CENDFILE: case CENDFILE:
goto endword; /* exit outer loop */ goto endword; /* exit outer loop */
case CIGN:
break;
default: default:
if (synstack->varnest == 0) { if (synstack->varnest == 0) {
#if BASH_REDIR_OUTPUT #if BASH_REDIR_OUTPUT
@ -12590,8 +12579,7 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
#endif #endif
goto endword; /* exit outer loop */ goto endword; /* exit outer loop */
} }
IF_ASH_ALIAS(if (c != PEOA)) USTPUTC(c, out);
USTPUTC(c, out);
} }
c = pgetc_top(synstack); c = pgetc_top(synstack);
} /* for (;;) */ } /* for (;;) */
@ -12642,14 +12630,9 @@ checkend: {
int markloc; int markloc;
char *p; char *p;
#if ENABLE_ASH_ALIAS
if (c == PEOA)
c = pgetc_without_PEOA();
#endif
if (striptabs) { if (striptabs) {
while (c == '\t') { while (c == '\t')
c = pgetc_without_PEOA(); c = pgetc();
}
} }
markloc = out - (char *)stackblock(); markloc = out - (char *)stackblock();
@ -12663,7 +12646,7 @@ checkend: {
* F * F
* (see heredoc_bkslash_newline2.tests) * (see heredoc_bkslash_newline2.tests)
*/ */
c = pgetc_without_PEOA(); c = pgetc();
} }
if (c == '\n' || c == PEOF) { if (c == '\n' || c == PEOF) {
@ -12788,7 +12771,6 @@ parsesub: {
c = pgetc_eatbnl(); c = pgetc_eatbnl();
if ((checkkwd & CHKEOFMARK) if ((checkkwd & CHKEOFMARK)
|| c > 255 /* PEOA or PEOF */
|| (c != '(' && c != '{' && !is_name(c) && !is_special(c)) || (c != '(' && c != '{' && !is_name(c) && !is_special(c))
) { ) {
#if BASH_DOLLAR_SQUOTE #if BASH_DOLLAR_SQUOTE
@ -12811,7 +12793,7 @@ parsesub: {
PARSEBACKQNEW(); PARSEBACKQNEW();
} }
} else { } else {
/* $VAR, $<specialchar>, ${...}, or PEOA/PEOF */ /* $VAR, $<specialchar>, ${...}, or PEOF */
smalluint newsyn = synstack->syntax; smalluint newsyn = synstack->syntax;
USTPUTC(CTLVAR, out); USTPUTC(CTLVAR, out);
@ -13006,13 +12988,9 @@ parsebackq: {
) { ) {
STPUTC('\\', pout); STPUTC('\\', pout);
} }
if (pc <= 255 /* not PEOA or PEOF */) { break;
break;
}
/* fall through */
case PEOF: case PEOF:
IF_ASH_ALIAS(case PEOA:)
raise_error_syntax("EOF in backquote substitution"); raise_error_syntax("EOF in backquote substitution");
case '\n': case '\n':
@ -13147,7 +13125,7 @@ xxreadtoken(void)
setprompt_if(needprompt, 2); setprompt_if(needprompt, 2);
for (;;) { /* until token or start of word found */ for (;;) { /* until token or start of word found */
c = pgetc_eatbnl(); c = pgetc_eatbnl();
if (c == ' ' || c == '\t' IF_ASH_ALIAS( || c == PEOA)) if (c == ' ' || c == '\t')
continue; continue;
if (c == '#') { if (c == '#') {
@ -13205,7 +13183,6 @@ xxreadtoken(void)
c = pgetc_eatbnl(); c = pgetc_eatbnl();
switch (c) { switch (c) {
case ' ': case '\t': case ' ': case '\t':
IF_ASH_ALIAS(case PEOA:)
continue; continue;
case '#': case '#':
while ((c = pgetc()) != '\n' && c != PEOF) while ((c = pgetc()) != '\n' && c != PEOF)