From ab1cecb4047864afb247a6ed691e7f59ce716f2c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 9 Mar 2018 00:14:02 +0800 Subject: parser: Add syntax stack for recursive parsing Without a stack of syntaxes we cannot correctly these two cases together: "${a#'$$'}" "${a#"${b-'$$'}"}" A recursive parser also helps in some other corner cases such as nested arithmetic expansion with paratheses. This patch adds a syntax stack allocated from the stack using alloca. As a side-effect this allows us to remove the naked backslashes for patterns within double-quotes, which means that EXP_QPAT also has to go. This patch also fixes removes any backslashes that precede right braces when they are present within a parameter expansion context, and backslashes that precede double quotes within inner double quotes inside a parameter expansion in a here-document context. The idea of a recursive parser is based on a patch by Harald van Dijk. Signed-off-by: Herbert Xu --- src/expand.c | 27 +++------ src/expand.h | 1 - src/parser.c | 177 ++++++++++++++++++++++++++++++++++++++--------------------- 3 files changed, 120 insertions(+), 85 deletions(-) (limited to 'src') diff --git a/src/expand.c b/src/expand.c index adcb70e..705fef7 100644 --- a/src/expand.c +++ b/src/expand.c @@ -85,7 +85,7 @@ #define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */ /* Add CTLESC when necessary. */ -#define QUOTES_ESC (EXP_FULL | EXP_CASE | EXP_QPAT) +#define QUOTES_ESC (EXP_FULL | EXP_CASE) /* Do not skip NUL characters. */ #define QUOTES_KEEPNUL EXP_TILDE @@ -335,16 +335,6 @@ addquote: case CTLESC: startloc++; length++; - - /* - * Quoted parameter expansion pattern: remove quote - * unless inside inner quotes or we have a literal - * backslash. - */ - if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) == - EXP_QPAT && *p != '\\') - break; - goto addquote; case CTLVAR: p = evalvar(p, flag | inquotes); @@ -653,8 +643,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varfla char *(*scan)(char *, char *, char *, char *, int , int); argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ? - (flag & (EXP_QUOTED | EXP_QPAT) ? - EXP_QPAT : EXP_CASE) : 0)); + EXP_CASE : 0)); STPUTC('\0', expdest); argbackq = saveargbackq; startp = stackblock() + startloc; @@ -1646,7 +1635,6 @@ char * _rmescapes(char *str, int flag) { char *p, *q, *r; - unsigned inquotes; int notescaped; int globbing; @@ -1676,24 +1664,23 @@ _rmescapes(char *str, int flag) q = mempcpy(q, str, len); } } - inquotes = 0; globbing = flag & RMESCAPE_GLOB; notescaped = globbing; while (*p) { if (*p == (char)CTLQUOTEMARK) { - inquotes = ~inquotes; p++; notescaped = globbing; continue; } + if (*p == '\\') { + /* naked back slash */ + notescaped = 0; + goto copy; + } if (*p == (char)CTLESC) { p++; if (notescaped) *q++ = '\\'; - } else if (*p == '\\' && !inquotes) { - /* naked back slash */ - notescaped = 0; - goto copy; } notescaped = globbing; copy: diff --git a/src/expand.h b/src/expand.h index 26dc5b4..90f5328 100644 --- a/src/expand.h +++ b/src/expand.h @@ -55,7 +55,6 @@ struct arglist { #define EXP_VARTILDE 0x4 /* expand tildes in an assignment */ #define EXP_REDIR 0x8 /* file glob for a redirection (1 match only) */ #define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */ -#define EXP_QPAT 0x20 /* pattern in quoted parameter expansion */ #define EXP_VARTILDE2 0x40 /* expand tildes after colons only */ #define EXP_WORD 0x80 /* expand word in parameter expansion */ #define EXP_QUOTED 0x100 /* expand word in double quotes */ diff --git a/src/parser.c b/src/parser.c index 8b945e3..c28363c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -80,6 +80,18 @@ struct heredoc { int striptabs; /* if set, strip leading tabs */ }; +struct synstack { + const char *syntax; + struct synstack *prev; + struct synstack *next; + int innerdq; + int varpushed; + int dblquote; + int varnest; /* levels of variables expansion */ + int parenlevel; /* levels of parens in arithmetic */ + int dqvarnest; /* levels of variables expansion within double quotes */ +}; + struct heredoc *heredoclist; /* list of here documents to read */ @@ -841,6 +853,21 @@ static int pgetc_eatbnl(void) return c; } +static void synstack_push(struct synstack **stack, struct synstack *next, + const char *syntax) +{ + memset(next, 0, sizeof(*next)); + next->syntax = syntax; + next->next = *stack; + (*stack)->prev = next; + *stack = next; +} + +static void synstack_pop(struct synstack **stack) +{ + *stack = (*stack)->next; +} + /* @@ -870,24 +897,15 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) size_t len; struct nodelist *bqlist; int quotef; - int dblquote; - int varnest; /* levels of variables expansion */ - int arinest; /* levels of arithmetic expansion */ - int parenlevel; /* levels of parens in arithmetic */ - int dqvarnest; /* levels of variables expansion within double quotes */ int oldstyle; - /* syntax before arithmetic */ - char const *uninitialized_var(prevsyntax); + /* syntax stack */ + struct synstack synbase = { .syntax = syntax }; + struct synstack *synstack = &synbase; - dblquote = 0; if (syntax == DQSYNTAX) - dblquote = 1; + synstack->dblquote = 1; quotef = 0; bqlist = NULL; - varnest = 0; - arinest = 0; - parenlevel = 0; - dqvarnest = 0; STARTSTACKSTR(out); loop: { /* for each line, until end of word */ @@ -895,7 +913,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) if (c == '\034' && doprompt && attyset() && ! equal(termval(), "emacs")) { attyline(); - if (syntax == BASESYNTAX) + if (synstack->syntax == BASESYNTAX) return readtoken(); c = syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl(); goto loop; @@ -904,9 +922,9 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) CHECKEND(); /* set c to PEOF if at end of here document */ for (;;) { /* until end of line or end of word */ CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ - switch(syntax[c]) { + switch(synstack->syntax[c]) { case CNL: /* '\n' */ - if (syntax == BASESYNTAX) + if (synstack->syntax == BASESYNTAX) goto endword; /* exit outer loop */ USTPUTC(c, out); nlprompt(); @@ -916,7 +934,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) USTPUTC(c, out); break; case CCTL: - if (eofmark == NULL || dblquote) + if (eofmark == NULL || synstack->dblquote) USTPUTC(CTLESC, out); USTPUTC(c, out); break; @@ -929,13 +947,18 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) pungetc(); } else { if ( - dblquote && + synstack->dblquote && c != '\\' && c != '`' && c != '$' && ( c != '"' || - eofmark != NULL + (eofmark != NULL && + !synstack->varnest) + ) && ( + c != '}' || + !synstack->varnest ) ) { + USTPUTC(CTLESC, out); USTPUTC('\\', out); } USTPUTC(CTLESC, out); @@ -944,55 +967,64 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) } break; case CSQUOTE: - syntax = SQSYNTAX; + synstack->syntax = SQSYNTAX; quotemark: if (eofmark == NULL) { USTPUTC(CTLQUOTEMARK, out); } break; case CDQUOTE: - syntax = DQSYNTAX; - dblquote = 1; + synstack->syntax = DQSYNTAX; + synstack->dblquote = 1; +toggledq: + if (synstack->varnest) + synstack->innerdq ^= 1; goto quotemark; case CENDQUOTE: - if (eofmark && !varnest) + if (eofmark && !synstack->varnest) { USTPUTC(c, out); - else { - if (dqvarnest == 0) { - syntax = BASESYNTAX; - dblquote = 0; - } - quotef++; - goto quotemark; + break; } - break; + + if (synstack->dqvarnest == 0) { + synstack->syntax = BASESYNTAX; + synstack->dblquote = 0; + } + + quotef++; + + if (c == '"') + goto toggledq; + + goto quotemark; case CVAR: /* '$' */ PARSESUB(); /* parse substitution */ break; case CENDVAR: /* '}' */ - if (varnest > 0) { - varnest--; - if (dqvarnest > 0) { - dqvarnest--; - } + if (!synstack->innerdq && + synstack->varnest > 0) { + if (!--synstack->varnest && + synstack->varpushed) + synstack_pop(&synstack); + else if (synstack->dqvarnest > 0) + synstack->dqvarnest--; USTPUTC(CTLENDVAR, out); } else { USTPUTC(c, out); } break; case CLP: /* '(' in arithmetic */ - parenlevel++; + synstack->parenlevel++; USTPUTC(c, out); break; case CRP: /* ')' in arithmetic */ - if (parenlevel > 0) { + if (synstack->parenlevel > 0) { USTPUTC(c, out); - --parenlevel; + --synstack->parenlevel; } else { if (pgetc_eatbnl() == ')') { USTPUTC(CTLENDARI, out); - if (!--arinest) - syntax = prevsyntax; + synstack_pop(&synstack); } else { /* * unbalanced parens @@ -1011,7 +1043,7 @@ quotemark: case CIGN: break; default: - if (varnest == 0) + if (synstack->varnest == 0) goto endword; /* exit outer loop */ if (c != PEOA) { USTPUTC(c, out); @@ -1021,11 +1053,11 @@ quotemark: } } endword: - if (syntax == ARISYNTAX) + if (synstack->syntax == ARISYNTAX) synerror("Missing '))'"); - if (syntax != BASESYNTAX && eofmark == NULL) + if (synstack->syntax != BASESYNTAX && eofmark == NULL) synerror("Unterminated quoted string"); - if (varnest != 0) { + if (synstack->varnest != 0) { /* { */ synerror("Missing '}'"); } @@ -1202,6 +1234,8 @@ parsesub: { PARSEBACKQNEW(); } } else { + const char *newsyn = synstack->syntax; + USTPUTC(CTLVAR, out); typeloc = out - (char *)stackblock(); STADJUST(1, out); @@ -1252,6 +1286,8 @@ varname: } if (subtype == 0) { + int cc = c; + switch (c) { case ':': subtype = VSNUL; @@ -1265,27 +1301,40 @@ varname: break; case '%': case '#': - { - int cc = c; - subtype = c == '#' ? VSTRIMLEFT : - VSTRIMRIGHT; - c = pgetc_eatbnl(); - if (c == cc) - subtype++; - else - pungetc(); - break; - } + subtype = c == '#' ? VSTRIMLEFT : + VSTRIMRIGHT; + c = pgetc_eatbnl(); + if (c == cc) + subtype++; + else + pungetc(); + + newsyn = BASESYNTAX; + break; } } else { badsub: pungetc(); } + + if (newsyn == ARISYNTAX && subtype > VSNORMAL) + newsyn = DQSYNTAX; + + if (newsyn != synstack->syntax) { + synstack_push(&synstack, + synstack->prev ?: + alloca(sizeof(*synstack)), + newsyn); + + synstack->varpushed++; + synstack->dblquote = newsyn != BASESYNTAX; + } + *((char *)stackblock() + typeloc) = subtype; if (subtype != VSNORMAL) { - varnest++; - if (dblquote) - dqvarnest++; + synstack->varnest++; + if (synstack->dblquote) + synstack->dqvarnest++; } STPUTC('=', out); } @@ -1335,7 +1384,7 @@ parsebackq: { case '\\': pc = pgetc_eatbnl(); if (pc != '\\' && pc != '`' && pc != '$' - && (!dblquote || pc != '"')) + && (!synstack->dblquote || pc != '"')) STPUTC('\\', pout); if (pc > PEOA) { break; @@ -1411,10 +1460,10 @@ done: */ parsearith: { - if (++arinest == 1) { - prevsyntax = syntax; - syntax = ARISYNTAX; - } + synstack_push(&synstack, + synstack->prev ?: alloca(sizeof(*synstack)), + ARISYNTAX); + synstack->dblquote = 1; USTPUTC(CTLARI, out); goto parsearith_return; } -- cgit 1.4.1