From 0a320d753fe7fca03df259a4dfd8e641e51edaa8 Mon Sep 17 00:00:00 2001 From: Hugo van der Sanden Date: Tue, 18 Feb 2020 13:51:16 +0000 Subject: [PATCH] study_chunk: extract rck_elide_nothing (CVE-2020-10878) (cherry picked from commit 93dee06613d4e1428fb10905ce1c3c96f53113dc) Upstream-Status: Backport [https://github.com/perl/perl5/commit/0a320d753fe7fca03df259a4dfd8e641e51edaa8] CVE: CVE-2020-10878 Signed-off-by: Lee Chee Yang --- embed.fnc | 1 + embed.h | 1 + proto.h | 3 +++ regcomp.c | 70 ++++++++++++++++++++++++++++++++++--------------------- 4 files changed, 48 insertions(+), 27 deletions(-) diff --git a/embed.fnc b/embed.fnc index aedb4baef19..d7cd04d3fc3 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2481,6 +2481,7 @@ Es |SSize_t|study_chunk |NN RExC_state_t *pRExC_state \ |I32 stopparen|U32 recursed_depth \ |NULLOK regnode_ssc *and_withp \ |U32 flags|U32 depth +Es |void |rck_elide_nothing|NN regnode *node EsR |SV * |get_ANYOFM_contents|NN const regnode * n EsRn |U32 |add_data |NN RExC_state_t* const pRExC_state \ |NN const char* const s|const U32 n diff --git a/embed.h b/embed.h index 75c91f77f45..356a8b98d96 100644 --- a/embed.h +++ b/embed.h @@ -1208,6 +1208,7 @@ #define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a) #define parse_uniprop_string(a,b,c,d,e,f,g,h,i) Perl_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i) #define populate_ANYOF_from_invlist(a,b) S_populate_ANYOF_from_invlist(aTHX_ a,b) +#define rck_elide_nothing(a) S_rck_elide_nothing(aTHX_ a) #define reg(a,b,c,d) S_reg(aTHX_ a,b,c,d) #define reg2Lanode(a,b,c,d) S_reg2Lanode(aTHX_ a,b,c,d) #define reg_node(a,b) S_reg_node(aTHX_ a,b) diff --git a/proto.h b/proto.h index 141ddbaee6d..f316fe134e1 100644 --- a/proto.h +++ b/proto.h @@ -5543,6 +5543,9 @@ PERL_CALLCONV SV * Perl_parse_uniprop_string(pTHX_ const char * const name, cons STATIC void S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr); #define PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST \ assert(node); assert(invlist_ptr) +STATIC void S_rck_elide_nothing(pTHX_ regnode *node); +#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING \ + assert(node) PERL_STATIC_NO_RET void S_re_croak2(pTHX_ bool utf8, const char* pat1, const char* pat2, ...) __attribute__noreturn__; #define PERL_ARGS_ASSERT_RE_CROAK2 \ diff --git a/regcomp.c b/regcomp.c index 5f86be8086d..4ba2980db66 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4450,6 +4450,44 @@ S_unwind_scan_frames(pTHX_ const void *p) } while (f); } +/* Follow the next-chain of the current node and optimize away + all the NOTHINGs from it. + */ +STATIC void +S_rck_elide_nothing(pTHX_ regnode *node) +{ + dVAR; + + PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING; + + if (OP(node) != CURLYX) { + const int max = (reg_off_by_arg[OP(node)] + ? I32_MAX + /* I32 may be smaller than U16 on CRAYs! */ + : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX)); + int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node)); + int noff; + regnode *n = node; + + /* Skip NOTHING and LONGJMP. */ + while ( + (n = regnext(n)) + && ( + (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n))) + || ((OP(n) == LONGJMP) && (noff = ARG(n))) + ) + && off + noff < max + ) { + off += noff; + } + if (reg_off_by_arg[OP(node)]) + ARG(node) = off; + else + NEXT_OFF(node) = off; + } + return; +} + /* the return from this sub is the minimum length that could possibly match */ STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, @@ -4550,28 +4588,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, */ JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0); - /* Follow the next-chain of the current node and optimize - away all the NOTHINGs from it. */ - if (OP(scan) != CURLYX) { - const int max = (reg_off_by_arg[OP(scan)] - ? I32_MAX - /* I32 may be smaller than U16 on CRAYs! */ - : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX)); - int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan)); - int noff; - regnode *n = scan; - - /* Skip NOTHING and LONGJMP. */ - while ((n = regnext(n)) - && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n))) - || ((OP(n) == LONGJMP) && (noff = ARG(n)))) - && off + noff < max) - off += noff; - if (reg_off_by_arg[OP(scan)]) - ARG(scan) = off; - else - NEXT_OFF(scan) = off; - } + /* Follow the next-chain of the current node and optimize + away all the NOTHINGs from it. + */ + rck_elide_nothing(scan); /* The principal pseudo-switch. Cannot be a switch, since we look into several different things. */ @@ -5745,11 +5765,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", if (data && (fl & SF_HAS_EVAL)) data->flags |= SF_HAS_EVAL; optimize_curly_tail: - if (OP(oscan) != CURLYX) { - while (PL_regkind[OP(next = regnext(oscan))] == NOTHING - && NEXT_OFF(next)) - NEXT_OFF(oscan) += NEXT_OFF(next); - } + rck_elide_nothing(oscan); continue; default: