diff options
Diffstat (limited to 'meta/recipes-core')
52 files changed, 5305 insertions, 862 deletions
diff --git a/meta/recipes-core/busybox/busybox-inittab_1.33.0.bb b/meta/recipes-core/busybox/busybox-inittab_1.33.2.bb index 3804f4f7b2..3804f4f7b2 100644 --- a/meta/recipes-core/busybox/busybox-inittab_1.33.0.bb +++ b/meta/recipes-core/busybox/busybox-inittab_1.33.2.bb diff --git a/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch b/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch new file mode 100644 index 0000000000..c07b53ebfd --- /dev/null +++ b/meta/recipes-core/busybox/busybox/0001-awk-fix-CVEs.patch @@ -0,0 +1,3266 @@ +From cf542caeed195af05fa6205341f829ccee53f8c2 Mon Sep 17 00:00:00 2001 +From: Chen Qi <Qi.Chen@windriver.com> +Date: Tue, 4 Jan 2022 17:48:03 -0800 +Subject: [PATCH] awk: fix CVEs + +The awk CVEs is hard to be separated, thus we use the following method +to format the current patch. +git rev-list --reverse 1_33_2..1_34_1 -- editors/awk.c | xargs git cherry-pick +git reset HEAD~66 && git add . && git commit + +CVE: CVE-2021-42378 +CVE: CVE-2021-42379 +CVE: CVE-2021-42380 +CVE: CVE-2021-42381 +CVE: CVE-2021-42382 +CVE: CVE-2021-42383 +CVE: CVE-2021-42384 +CVE: CVE-2021-42385 +CVE: CVE-2021-42386 + +Upstream-Status: Backport + +Signed-off-by: Chen Qi <Qi.Chen@windriver.com> +--- + editors/awk.c | 2060 +++++++++++++++++++++++----------------- + testsuite/awk.tests | 62 +- + testsuite/printf.tests | 5 + + 3 files changed, 1264 insertions(+), 863 deletions(-) + +diff --git a/editors/awk.c b/editors/awk.c +index 2c15f9e4e..f6314ac72 100644 +--- a/editors/awk.c ++++ b/editors/awk.c +@@ -66,6 +66,8 @@ + #endif + #ifndef debug_printf_parse + # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__)) ++#else ++# define debug_parse_print_tc(...) ((void)0) + #endif + + +@@ -91,7 +93,6 @@ enum { + }; + + #define MAXVARFMT 240 +-#define MINNVBLOCK 64 + + /* variable flags */ + #define VF_NUMBER 0x0001 /* 1 = primary type is number */ +@@ -101,7 +102,7 @@ enum { + #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */ + #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */ + #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */ +-#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */ ++#define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */ + #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */ + #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */ + +@@ -118,8 +119,8 @@ typedef struct walker_list { + /* Variable */ + typedef struct var_s { + unsigned type; /* flags */ +- double number; + char *string; ++ double number; + union { + int aidx; /* func arg idx (for compilation stage) */ + struct xhash_s *array; /* array ptr */ +@@ -138,6 +139,7 @@ typedef struct chain_s { + /* Function */ + typedef struct func_s { + unsigned nargs; ++ smallint defined; + struct chain_s body; + } func; + +@@ -177,7 +179,7 @@ typedef struct node_s { + struct node_s *n; + var *v; + int aidx; +- char *new_progname; ++ const char *new_progname; + regex_t *re; + } l; + union { +@@ -190,91 +192,120 @@ typedef struct node_s { + } a; + } node; + +-/* Block of temporary variables */ +-typedef struct nvblock_s { +- int size; +- var *pos; +- struct nvblock_s *prev; +- struct nvblock_s *next; +- var nv[]; +-} nvblock; +- + typedef struct tsplitter_s { + node n; + regex_t re[2]; + } tsplitter; + + /* simple token classes */ +-/* Order and hex values are very important!!! See next_token() */ +-#define TC_SEQSTART (1 << 0) /* ( */ +-#define TC_SEQTERM (1 << 1) /* ) */ +-#define TC_REGEXP (1 << 2) /* /.../ */ +-#define TC_OUTRDR (1 << 3) /* | > >> */ +-#define TC_UOPPOST (1 << 4) /* unary postfix operator */ +-#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */ +-#define TC_BINOPX (1 << 6) /* two-opnd operator */ +-#define TC_IN (1 << 7) +-#define TC_COMMA (1 << 8) +-#define TC_PIPE (1 << 9) /* input redirection pipe */ +-#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */ +-#define TC_ARRTERM (1 << 11) /* ] */ +-#define TC_GRPSTART (1 << 12) /* { */ +-#define TC_GRPTERM (1 << 13) /* } */ +-#define TC_SEMICOL (1 << 14) +-#define TC_NEWLINE (1 << 15) +-#define TC_STATX (1 << 16) /* ctl statement (for, next...) */ +-#define TC_WHILE (1 << 17) +-#define TC_ELSE (1 << 18) +-#define TC_BUILTIN (1 << 19) ++/* order and hex values are very important!!! See next_token() */ ++#define TC_LPAREN (1 << 0) /* ( */ ++#define TC_RPAREN (1 << 1) /* ) */ ++#define TC_REGEXP (1 << 2) /* /.../ */ ++#define TC_OUTRDR (1 << 3) /* | > >> */ ++#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */ ++#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */ ++#define TC_BINOPX (1 << 6) /* two-opnd operator */ ++#define TC_IN (1 << 7) /* 'in' */ ++#define TC_COMMA (1 << 8) /* , */ ++#define TC_PIPE (1 << 9) /* input redirection pipe | */ ++#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ ++#define TC_ARRTERM (1 << 11) /* ] */ ++#define TC_LBRACE (1 << 12) /* { */ ++#define TC_RBRACE (1 << 13) /* } */ ++#define TC_SEMICOL (1 << 14) /* ; */ ++#define TC_NEWLINE (1 << 15) ++#define TC_STATX (1 << 16) /* ctl statement (for, next...) */ ++#define TC_WHILE (1 << 17) /* 'while' */ ++#define TC_ELSE (1 << 18) /* 'else' */ ++#define TC_BUILTIN (1 << 19) + /* This costs ~50 bytes of code. + * A separate class to support deprecated "length" form. If we don't need that + * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH + * can be merged with TC_BUILTIN: + */ +-#define TC_LENGTH (1 << 20) +-#define TC_GETLINE (1 << 21) +-#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */ +-#define TC_BEGIN (1 << 23) +-#define TC_END (1 << 24) +-#define TC_EOF (1 << 25) +-#define TC_VARIABLE (1 << 26) +-#define TC_ARRAY (1 << 27) +-#define TC_FUNCTION (1 << 28) +-#define TC_STRING (1 << 29) +-#define TC_NUMBER (1 << 30) +- +-#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) +- +-/* combined token classes */ +-#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) +-//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) +-#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ +- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ +- | TC_SEQSTART | TC_STRING | TC_NUMBER) +- +-#define TC_STATEMNT (TC_STATX | TC_WHILE) +-#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) ++#define TC_LENGTH (1 << 20) /* 'length' */ ++#define TC_GETLINE (1 << 21) /* 'getline' */ ++#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */ ++#define TC_BEGIN (1 << 23) /* 'BEGIN' */ ++#define TC_END (1 << 24) /* 'END' */ ++#define TC_EOF (1 << 25) ++#define TC_VARIABLE (1 << 26) /* name */ ++#define TC_ARRAY (1 << 27) /* name[ */ ++#define TC_FUNCTION (1 << 28) /* name( */ ++#define TC_STRING (1 << 29) /* "..." */ ++#define TC_NUMBER (1 << 30) ++ ++#ifndef debug_parse_print_tc ++static void debug_parse_print_tc(uint32_t n) ++{ ++ if (n & TC_LPAREN ) debug_printf_parse(" LPAREN" ); ++ if (n & TC_RPAREN ) debug_printf_parse(" RPAREN" ); ++ if (n & TC_REGEXP ) debug_printf_parse(" REGEXP" ); ++ if (n & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); ++ if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); ++ if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" ); ++ if (n & TC_BINOPX ) debug_printf_parse(" BINOPX" ); ++ if (n & TC_IN ) debug_printf_parse(" IN" ); ++ if (n & TC_COMMA ) debug_printf_parse(" COMMA" ); ++ if (n & TC_PIPE ) debug_printf_parse(" PIPE" ); ++ if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); ++ if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); ++ if (n & TC_LBRACE ) debug_printf_parse(" LBRACE" ); ++ if (n & TC_RBRACE ) debug_printf_parse(" RBRACE" ); ++ if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); ++ if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); ++ if (n & TC_STATX ) debug_printf_parse(" STATX" ); ++ if (n & TC_WHILE ) debug_printf_parse(" WHILE" ); ++ if (n & TC_ELSE ) debug_printf_parse(" ELSE" ); ++ if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" ); ++ if (n & TC_LENGTH ) debug_printf_parse(" LENGTH" ); ++ if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" ); ++ if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL"); ++ if (n & TC_BEGIN ) debug_printf_parse(" BEGIN" ); ++ if (n & TC_END ) debug_printf_parse(" END" ); ++ if (n & TC_EOF ) debug_printf_parse(" EOF" ); ++ if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE"); ++ if (n & TC_ARRAY ) debug_printf_parse(" ARRAY" ); ++ if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION"); ++ if (n & TC_STRING ) debug_printf_parse(" STRING" ); ++ if (n & TC_NUMBER ) debug_printf_parse(" NUMBER" ); ++} ++#endif ++ ++/* combined token classes ("token [class] sets") */ ++#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) ++ ++#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) ++//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST) ++#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ ++ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ ++ | TC_LPAREN | TC_STRING | TC_NUMBER) ++ ++#define TS_LVALUE (TC_VARIABLE | TC_ARRAY) ++#define TS_STATEMNT (TC_STATX | TC_WHILE) + + /* word tokens, cannot mean something else if not expected */ +-#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \ +- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ +- | TC_FUNCDECL | TC_BEGIN | TC_END) ++#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \ ++ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ ++ | TC_FUNCDECL | TC_BEGIN | TC_END) + + /* discard newlines after these */ +-#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ +- | TC_BINOP | TC_OPTERM) ++#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \ ++ | TC_SEMICOL | TC_NEWLINE) + + /* what can expression begin with */ +-#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP) ++#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP) + /* what can group begin with */ +-#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART) ++#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \ ++ | TC_SEMICOL | TC_NEWLINE | TC_LBRACE) + +-/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */ ++/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ + /* operator is inserted between them */ +-#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ ++#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \ + | TC_STRING | TC_NUMBER | TC_UOPPOST \ + | TC_LENGTH) +-#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE) ++#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE) + + #define OF_RES1 0x010000 + #define OF_RES2 0x020000 +@@ -284,13 +315,12 @@ typedef struct tsplitter_s { + #define OF_CHECKED 0x200000 + #define OF_REQUIRED 0x400000 + +- + /* combined operator flags */ + #define xx 0 + #define xV OF_RES2 + #define xS (OF_RES2 | OF_STR2) + #define Vx OF_RES1 +-#define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED) ++#define Rx OF_REQUIRED + #define VV (OF_RES1 | OF_RES2) + #define Nx (OF_RES1 | OF_NUM1) + #define NV (OF_RES1 | OF_NUM1 | OF_RES2) +@@ -302,8 +332,7 @@ typedef struct tsplitter_s { + #define OPNMASK 0x007F + + /* operator priority is a highest byte (even: r->l, odd: l->r grouping) +- * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1, +- * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string ++ * (for builtins it has different meaning) + */ + #undef P + #undef PRIMASK +@@ -313,10 +342,8 @@ typedef struct tsplitter_s { + #define PRIMASK2 0x7E000000 + + /* Operation classes */ +- + #define SHIFT_TIL_THIS 0x0600 + #define RECUR_FROM_THIS 0x1000 +- + enum { + OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300, + OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600, +@@ -358,8 +385,8 @@ enum { + #define NTCC '\377' + + static const char tokenlist[] ALIGN1 = +- "\1(" NTC /* TC_SEQSTART */ +- "\1)" NTC /* TC_SEQTERM */ ++ "\1(" NTC /* TC_LPAREN */ ++ "\1)" NTC /* TC_RPAREN */ + "\1/" NTC /* TC_REGEXP */ + "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */ + "\2++" "\2--" NTC /* TC_UOPPOST */ +@@ -376,8 +403,8 @@ static const char tokenlist[] ALIGN1 = + "\1|" NTC /* TC_PIPE */ + "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */ + "\1]" NTC /* TC_ARRTERM */ +- "\1{" NTC /* TC_GRPSTART */ +- "\1}" NTC /* TC_GRPTERM */ ++ "\1{" NTC /* TC_LBRACE */ ++ "\1}" NTC /* TC_RBRACE */ + "\1;" NTC /* TC_SEMICOL */ + "\1\n" NTC /* TC_NEWLINE */ + "\2if" "\2do" "\3for" "\5break" /* TC_STATX */ +@@ -391,7 +418,7 @@ static const char tokenlist[] ALIGN1 = + "\5close" "\6system" "\6fflush" "\5atan2" + "\3cos" "\3exp" "\3int" "\3log" + "\4rand" "\3sin" "\4sqrt" "\5srand" +- "\6gensub" "\4gsub" "\5index" /* "\6length" was here */ ++ "\6gensub" "\4gsub" "\5index" /* "\6length" was here */ + "\5match" "\5split" "\7sprintf" "\3sub" + "\6substr" "\7systime" "\10strftime" "\6mktime" + "\7tolower" "\7toupper" NTC +@@ -403,25 +430,32 @@ static const char tokenlist[] ALIGN1 = + /* compiler adds trailing "\0" */ + ; + +-#define OC_B OC_BUILTIN +- + static const uint32_t tokeninfo[] ALIGN4 = { + 0, + 0, +- OC_REGEXP, ++#define TI_REGEXP OC_REGEXP ++ TI_REGEXP, + xS|'a', xS|'w', xS|'|', + OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', +- OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5), ++#define TI_PREINC (OC_UNARY|xV|P(9)|'P') ++#define TI_PREDEC (OC_UNARY|xV|P(9)|'M') ++ TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), + OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', + OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', + OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', + OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, +- OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), +- OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', +- OC_IN|SV|P(49), /* TC_IN */ +- OC_COMMA|SS|P(80), +- OC_PGETLINE|SV|P(37), ++#define TI_LESS (OC_COMPARE|VV|P(39)|2) ++ TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), ++#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?') ++#define TI_COLON (OC_COLON|xx|P(67)|':') ++ OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON, ++#define TI_IN (OC_IN|SV|P(49)) ++ TI_IN, ++#define TI_COMMA (OC_COMMA|SS|P(80)) ++ TI_COMMA, ++#define TI_PGETLINE (OC_PGETLINE|SV|P(37)) ++ TI_PGETLINE, + OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', + 0, /* ] */ + 0, +@@ -434,20 +468,45 @@ static const uint32_t tokeninfo[] ALIGN4 = { + OC_RETURN|Vx, OC_EXIT|Nx, + ST_WHILE, + 0, /* else */ +- OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), +- OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), +- OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), +- OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, +- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, +- OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */ +- OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), +- OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), +- OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), +- OC_FBLTIN|Sx|F_le, /* TC_LENGTH */ +- OC_GETLINE|SV|P(0), +- 0, 0, +- 0, +- 0 /* TC_END */ ++// OC_B's are builtins with enforced minimum number of arguments (two upper bits). ++// Highest byte bit pattern: nn s3s2s1 v3v2v1 ++// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var ++// OC_F's are builtins with zero or one argument. ++// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt ++// Check for no args is present in builtins' code (not in this table): rand, systime ++// Have one _optional_ arg: fflush, srand, length ++#define OC_B OC_BUILTIN ++#define OC_F OC_FBLTIN ++#define A1 P(0x40) /*one arg*/ ++#define A2 P(0x80) /*two args*/ ++#define A3 P(0xc0) /*three args*/ ++#define __v P(1) ++#define _vv P(3) ++#define __s__v P(9) ++#define __s_vv P(0x0b) ++#define __svvv P(0x0f) ++#define _ss_vv P(0x1b) ++#define _s_vv_ P(0x16) ++#define ss_vv_ P(0x36) ++ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or ++ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor ++ OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2 ++ OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log ++ OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand ++ OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ ++ OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub ++ OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime ++ OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper ++ OC_F|F_le|Sx, // length ++ OC_GETLINE|SV, // getline ++ 0, 0, // func function ++ 0, // BEGIN ++ 0 // END ++#undef A1 ++#undef A2 ++#undef A3 ++#undef OC_B ++#undef OC_F + }; + + /* internal variable names and their initial values */ +@@ -488,21 +547,29 @@ struct globals { + chain *seq; + node *break_ptr, *continue_ptr; + rstream *iF; +- xhash *vhash, *ahash, *fdhash, *fnhash; ++ xhash *ahash; /* argument names, used only while parsing function bodies */ ++ xhash *fnhash; /* function names, used only in parsing stage */ ++ xhash *vhash; /* variables and arrays */ ++ //xhash *fdhash; /* file objects, used only in execution stage */ ++ //we are reusing ahash as fdhash, via define (see later) + const char *g_progname; + int g_lineno; + int nfields; + int maxfields; /* used in fsrealloc() only */ + var *Fields; +- nvblock *g_cb; + char *g_pos; +- char *g_buf; ++ char g_saved_ch; + smallint icase; + smallint exiting; + smallint nextrec; + smallint nextfile; + smallint is_f0_split; + smallint t_rollback; ++ ++ /* former statics from various functions */ ++ smallint next_token__concat_inserted; ++ uint32_t next_token__save_tclass; ++ uint32_t next_token__save_info; + }; + struct globals2 { + uint32_t t_info; /* often used */ +@@ -515,32 +582,35 @@ struct globals2 { + /* former statics from various functions */ + char *split_f0__fstrings; + +- uint32_t next_token__save_tclass; +- uint32_t next_token__save_info; +- uint32_t next_token__ltclass; +- smallint next_token__concat_inserted; +- +- smallint next_input_file__files_happen; + rstream next_input_file__rsm; ++ smallint next_input_file__files_happen; ++ ++ smalluint exitcode; + +- var *evaluate__fnargs; + unsigned evaluate__seed; ++ var *evaluate__fnargs; + regex_t evaluate__sreg; + +- var ptest__v; ++ var ptest__tmpvar; ++ var awk_printf__tmpvar; ++ var as_regex__tmpvar; ++ var exit__tmpvar; ++ var main__tmpvar; + + tsplitter exec_builtin__tspl; + + /* biggest and least used members go last */ + tsplitter fsplitter, rsplitter; ++ ++ char g_buf[MAXVARFMT + 1]; + }; + #define G1 (ptr_to_globals[-1]) + #define G (*(struct globals2 *)ptr_to_globals) + /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ +-/*char G1size[sizeof(G1)]; - 0x74 */ +-/*char Gsize[sizeof(G)]; - 0x1c4 */ ++//char G1size[sizeof(G1)]; // 0x70 ++//char Gsize[sizeof(G)]; // 0x2f8 + /* Trying to keep most of members accessible with short offsets: */ +-/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */ ++//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c + #define t_double (G1.t_double ) + #define beginseq (G1.beginseq ) + #define mainseq (G1.mainseq ) +@@ -549,18 +619,20 @@ struct globals2 { + #define break_ptr (G1.break_ptr ) + #define continue_ptr (G1.continue_ptr) + #define iF (G1.iF ) +-#define vhash (G1.vhash ) + #define ahash (G1.ahash ) +-#define fdhash (G1.fdhash ) + #define fnhash (G1.fnhash ) ++#define vhash (G1.vhash ) ++#define fdhash ahash ++//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing, ++// and ends up empty after parsing phase. Thus, we can simply reuse it ++// for fdhash in execution stage. + #define g_progname (G1.g_progname ) + #define g_lineno (G1.g_lineno ) + #define nfields (G1.nfields ) + #define maxfields (G1.maxfields ) + #define Fields (G1.Fields ) +-#define g_cb (G1.g_cb ) + #define g_pos (G1.g_pos ) +-#define g_buf (G1.g_buf ) ++#define g_saved_ch (G1.g_saved_ch ) + #define icase (G1.icase ) + #define exiting (G1.exiting ) + #define nextrec (G1.nextrec ) +@@ -574,25 +646,13 @@ struct globals2 { + #define intvar (G.intvar ) + #define fsplitter (G.fsplitter ) + #define rsplitter (G.rsplitter ) ++#define g_buf (G.g_buf ) + #define INIT_G() do { \ + SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ +- G.next_token__ltclass = TC_OPTERM; \ ++ t_tclass = TC_NEWLINE; \ + G.evaluate__seed = 1; \ + } while (0) + +- +-/* function prototypes */ +-static void handle_special(var *); +-static node *parse_expr(uint32_t); +-static void chain_group(void); +-static var *evaluate(node *, var *); +-static rstream *next_input_file(void); +-static int fmt_num(char *, int, const char *, double, int); +-static int awk_exit(int) NORETURN; +- +-/* ---- error handling ---- */ +- +-static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error"; + static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; + static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; + static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; +@@ -604,10 +664,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; + static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; + static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field"; + +-static void zero_out_var(var *vp) +-{ +- memset(vp, 0, sizeof(*vp)); +-} ++static int awk_exit(void) NORETURN; + + static void syntax_error(const char *message) NORETURN; + static void syntax_error(const char *message) +@@ -638,12 +695,40 @@ static xhash *hash_init(void) + return newhash; + } + ++static void hash_clear(xhash *hash) ++{ ++ unsigned i; ++ hash_item *hi, *thi; ++ ++ for (i = 0; i < hash->csize; i++) { ++ hi = hash->items[i]; ++ while (hi) { ++ thi = hi; ++ hi = hi->next; ++//FIXME: this assumes that it's a hash of *variables*: ++ free(thi->data.v.string); ++ free(thi); ++ } ++ hash->items[i] = NULL; ++ } ++ hash->glen = hash->nel = 0; ++} ++ ++#if 0 //UNUSED ++static void hash_free(xhash *hash) ++{ ++ hash_clear(hash); ++ free(hash->items); ++ free(hash); ++} ++#endif ++ + /* find item in hash, return ptr to data, NULL if not found */ +-static void *hash_search(xhash *hash, const char *name) ++static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx) + { + hash_item *hi; + +- hi = hash->items[hashidx(name) % hash->csize]; ++ hi = hash->items[idx % hash->csize]; + while (hi) { + if (strcmp(hi->name, name) == 0) + return &hi->data; +@@ -652,6 +737,11 @@ static void *hash_search(xhash *hash, const char *name) + return NULL; + } + ++static void *hash_search(xhash *hash, const char *name) ++{ ++ return hash_search3(hash, name, hashidx(name)); ++} ++ + /* grow hash if it becomes too big */ + static void hash_rebuild(xhash *hash) + { +@@ -687,16 +777,17 @@ static void *hash_find(xhash *hash, const char *name) + unsigned idx; + int l; + +- hi = hash_search(hash, name); ++ idx = hashidx(name); ++ hi = hash_search3(hash, name, idx); + if (!hi) { +- if (++hash->nel / hash->csize > 10) ++ if (++hash->nel > hash->csize * 8) + hash_rebuild(hash); + + l = strlen(name) + 1; + hi = xzalloc(sizeof(*hi) + l); + strcpy(hi->name, name); + +- idx = hashidx(name) % hash->csize; ++ idx = idx % hash->csize; + hi->next = hash->items[idx]; + hash->items[idx] = hi; + hash->glen += l; +@@ -731,7 +822,7 @@ static void hash_remove(xhash *hash, const char *name) + + static char *skip_spaces(char *p) + { +- while (1) { ++ for (;;) { + if (*p == '\\' && p[1] == '\n') { + p++; + t_lineno++; +@@ -747,8 +838,10 @@ static char *skip_spaces(char *p) + static char *nextword(char **s) + { + char *p = *s; +- while (*(*s)++ != '\0') ++ char *q = p; ++ while (*q++ != '\0') + continue; ++ *s = q; + return p; + } + +@@ -811,10 +904,27 @@ static double my_strtod(char **pp) + + /* -------- working with variables (set/get/copy/etc) -------- */ + +-static xhash *iamarray(var *v) ++static void fmt_num(const char *format, double n) + { +- var *a = v; ++ if (n == (long long)n) { ++ snprintf(g_buf, MAXVARFMT, "%lld", (long long)n); ++ } else { ++ const char *s = format; ++ char c; ++ ++ do { c = *s; } while (c && *++s); ++ if (strchr("diouxX", c)) { ++ snprintf(g_buf, MAXVARFMT, format, (int)n); ++ } else if (strchr("eEfFgGaA", c)) { ++ snprintf(g_buf, MAXVARFMT, format, n); ++ } else { ++ syntax_error(EMSG_INV_FMT); ++ } ++ } ++} + ++static xhash *iamarray(var *a) ++{ + while (a->type & VF_CHILD) + a = a->x.parent; + +@@ -825,23 +935,7 @@ static xhash *iamarray(var *v) + return a->x.array; + } + +-static void clear_array(xhash *array) +-{ +- unsigned i; +- hash_item *hi, *thi; +- +- for (i = 0; i < array->csize; i++) { +- hi = array->items[i]; +- while (hi) { +- thi = hi; +- hi = hi->next; +- free(thi->data.v.string); +- free(thi); +- } +- array->items[i] = NULL; +- } +- array->glen = array->nel = 0; +-} ++#define clear_array(array) hash_clear(array) + + /* clear a variable */ + static var *clrvar(var *v) +@@ -855,6 +949,8 @@ static var *clrvar(var *v) + return v; + } + ++static void handle_special(var *); ++ + /* assign string value to variable */ + static var *setvar_p(var *v, char *value) + { +@@ -901,7 +997,7 @@ static const char *getvar_s(var *v) + { + /* if v is numeric and has no cached string, convert it to string */ + if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) { +- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE); ++ fmt_num(getvar_s(intvar[CONVFMT]), v->number); + v->string = xstrdup(g_buf); + v->type |= VF_CACHED; + } +@@ -920,6 +1016,7 @@ static double getvar_i(var *v) + v->number = my_strtod(&s); + debug_printf_eval("%f (s:'%s')\n", v->number, s); + if (v->type & VF_USER) { ++//TODO: skip_spaces() also skips backslash+newline, is it intended here? + s = skip_spaces(s); + if (*s != '\0') + v->type &= ~VF_USER; +@@ -981,94 +1078,28 @@ static int istrue(var *v) + return (v->string && v->string[0]); + } + +-/* temporary variables allocator. Last allocated should be first freed */ +-static var *nvalloc(int n) +-{ +- nvblock *pb = NULL; +- var *v, *r; +- int size; +- +- while (g_cb) { +- pb = g_cb; +- if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) +- break; +- g_cb = g_cb->next; +- } +- +- if (!g_cb) { +- size = (n <= MINNVBLOCK) ? MINNVBLOCK : n; +- g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var)); +- g_cb->size = size; +- g_cb->pos = g_cb->nv; +- g_cb->prev = pb; +- /*g_cb->next = NULL; - xzalloc did it */ +- if (pb) +- pb->next = g_cb; +- } +- +- v = r = g_cb->pos; +- g_cb->pos += n; +- +- while (v < g_cb->pos) { +- v->type = 0; +- v->string = NULL; +- v++; +- } +- +- return r; +-} +- +-static void nvfree(var *v) +-{ +- var *p; +- +- if (v < g_cb->nv || v >= g_cb->pos) +- syntax_error(EMSG_INTERNAL_ERROR); +- +- for (p = v; p < g_cb->pos; p++) { +- if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { +- clear_array(iamarray(p)); +- free(p->x.array->items); +- free(p->x.array); +- } +- if (p->type & VF_WALK) { +- walker_list *n; +- walker_list *w = p->x.walker; +- debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); +- p->x.walker = NULL; +- while (w) { +- n = w->prev; +- debug_printf_walker(" free(%p)\n", w); +- free(w); +- w = n; +- } +- } +- clrvar(p); +- } +- +- g_cb->pos = v; +- while (g_cb->prev && g_cb->pos == g_cb->nv) { +- g_cb = g_cb->prev; +- } +-} +- + /* ------- awk program text parsing ------- */ + +-/* Parse next token pointed by global pos, place results into global ttt. +- * If token isn't expected, give away. Return token class ++/* Parse next token pointed by global pos, place results into global t_XYZ variables. ++ * If token isn't expected, print error message and die. ++ * Return token class (also store it in t_tclass). + */ + static uint32_t next_token(uint32_t expected) + { +-#define concat_inserted (G.next_token__concat_inserted) +-#define save_tclass (G.next_token__save_tclass) +-#define save_info (G.next_token__save_info) +-/* Initialized to TC_OPTERM: */ +-#define ltclass (G.next_token__ltclass) ++#define concat_inserted (G1.next_token__concat_inserted) ++#define save_tclass (G1.next_token__save_tclass) ++#define save_info (G1.next_token__save_info) + +- char *p, *s; ++ char *p; + const char *tl; +- uint32_t tc; + const uint32_t *ti; ++ uint32_t tc, last_token_class; ++ ++ last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */ ++ ++ debug_printf_parse("%s() expected(%x):", __func__, expected); ++ debug_parse_print_tc(expected); ++ debug_printf_parse("\n"); + + if (t_rollback) { + debug_printf_parse("%s: using rolled-back token\n", __func__); +@@ -1080,6 +1111,10 @@ static uint32_t next_token(uint32_t expected) + t_info = save_info; + } else { + p = g_pos; ++ if (g_saved_ch != '\0') { ++ *p = g_saved_ch; ++ g_saved_ch = '\0'; ++ } + readnext: + p = skip_spaces(p); + g_lineno = t_lineno; +@@ -1087,15 +1122,12 @@ static uint32_t next_token(uint32_t expected) + while (*p != '\n' && *p != '\0') + p++; + +- if (*p == '\n') +- t_lineno++; +- + if (*p == '\0') { + tc = TC_EOF; + debug_printf_parse("%s: token found: TC_EOF\n", __func__); + } else if (*p == '\"') { + /* it's a string */ +- t_string = s = ++p; ++ char *s = t_string = ++p; + while (*p != '\"') { + char *pp; + if (*p == '\0' || *p == '\n') +@@ -1110,7 +1142,7 @@ static uint32_t next_token(uint32_t expected) + debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string); + } else if ((expected & TC_REGEXP) && *p == '/') { + /* it's regexp */ +- t_string = s = ++p; ++ char *s = t_string = ++p; + while (*p != '/') { + if (*p == '\0' || *p == '\n') + syntax_error(EMSG_UNEXP_EOS); +@@ -1141,6 +1173,11 @@ static uint32_t next_token(uint32_t expected) + tc = TC_NUMBER; + debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); + } else { ++ char *end_of_name; ++ ++ if (*p == '\n') ++ t_lineno++; ++ + /* search for something known */ + tl = tokenlist; + tc = 0x00000001; +@@ -1155,9 +1192,9 @@ static uint32_t next_token(uint32_t expected) + * token matches, + * and it's not a longer word, + */ +- if ((tc & (expected | TC_WORD | TC_NEWLINE)) ++ if ((tc & (expected | TS_WORD | TC_NEWLINE)) + && strncmp(p, tl, l) == 0 +- && !((tc & TC_WORD) && isalnum_(p[l])) ++ && !((tc & TS_WORD) && isalnum_(p[l])) + ) { + /* then this is what we are looking for */ + t_info = *ti; +@@ -1174,67 +1211,94 @@ static uint32_t next_token(uint32_t expected) + if (!isalnum_(*p)) + syntax_error(EMSG_UNEXP_TOKEN); /* no */ + /* yes */ +- t_string = --p; +- while (isalnum_(*++p)) { +- p[-1] = *p; +- } +- p[-1] = '\0'; +- tc = TC_VARIABLE; +- /* also consume whitespace between functionname and bracket */ +- if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) ++ t_string = p; ++ while (isalnum_(*p)) ++ p++; ++ end_of_name = p; ++ ++ if (last_token_class == TC_FUNCDECL) ++ /* eat space in "function FUNC (...) {...}" declaration */ + p = skip_spaces(p); ++ else if (expected & TC_ARRAY) { ++ /* eat space between array name and [ */ ++ char *s = skip_spaces(p); ++ if (*s == '[') /* array ref, not just a name? */ ++ p = s; ++ } ++ /* else: do NOT consume whitespace after variable name! ++ * gawk allows definition "function FUNC (p) {...}" - note space, ++ * but disallows the call "FUNC (p)" because it isn't one - ++ * expression "v (a)" should NOT be parsed as TC_FUNCTION: ++ * it is a valid concatenation if "v" is a variable, ++ * not a function name (and type of name is not known at parse time). ++ */ ++ + if (*p == '(') { ++ p++; + tc = TC_FUNCTION; + debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string); ++ } else if (*p == '[') { ++ p++; ++ tc = TC_ARRAY; ++ debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); + } else { +- if (*p == '[') { +- p++; +- tc = TC_ARRAY; +- debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); +- } else +- debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); ++ tc = TC_VARIABLE; ++ debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); ++ if (end_of_name == p) { ++ /* there is no space for trailing NUL in t_string! ++ * We need to save the char we are going to NUL. ++ * (we'll use it in future call to next_token()) ++ */ ++ g_saved_ch = *end_of_name; ++// especially pathological example is V="abc"; V.2 - it's V concatenated to .2 ++// (it evaluates to "abc0.2"). Because of this case, we can't simply cache ++// '.' and analyze it later: we also have to *store it back* in next ++// next_token(), in order to give my_strtod() the undamaged ".2" string. ++ } + } ++ *end_of_name = '\0'; /* terminate t_string */ + } + token_found: + g_pos = p; + + /* skipping newlines in some cases */ +- if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE)) ++ if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE)) + goto readnext; + + /* insert concatenation operator when needed */ +- debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__, +- (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP)); +- if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) +- && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ ++ debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, ++ (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP), ++ !(last_token_class == TC_LENGTH && tc == TC_LPAREN)); ++ if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP) ++ && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */ + ) { + concat_inserted = TRUE; + save_tclass = tc; + save_info = t_info; +- tc = TC_BINOP; ++ tc = TC_BINOPX; + t_info = OC_CONCAT | SS | P(35); + } + +- debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass); + t_tclass = tc; ++ debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc); + } +- ltclass = t_tclass; +- + /* Are we ready for this? */ +- if (!(ltclass & expected)) { +- syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? ++ if (!(t_tclass & expected)) { ++ syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ? + EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); + } + +- debug_printf_parse("%s: returning, ltclass:%x t_double:%f\n", __func__, ltclass, t_double); +- return ltclass; ++ debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double); ++ debug_parse_print_tc(t_tclass); ++ debug_printf_parse("\n"); ++ ++ return t_tclass; + #undef concat_inserted + #undef save_tclass + #undef save_info +-#undef ltclass + } + +-static void rollback_token(void) ++static ALWAYS_INLINE void rollback_token(void) + { + t_rollback = TRUE; + } +@@ -1251,169 +1315,188 @@ static node *new_node(uint32_t info) + + static void mk_re_node(const char *s, node *n, regex_t *re) + { +- n->info = OC_REGEXP; ++ n->info = TI_REGEXP; + n->l.re = re; + n->r.ire = re + 1; + xregcomp(re, s, REG_EXTENDED); + xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); + } + +-static node *condition(void) ++static node *parse_expr(uint32_t); ++ ++static node *parse_lrparen_list(void) + { +- next_token(TC_SEQSTART); +- return parse_expr(TC_SEQTERM); ++ next_token(TC_LPAREN); ++ return parse_expr(TC_RPAREN); + } + + /* parse expression terminated by given argument, return ptr + * to built subtree. Terminator is eaten by parse_expr */ +-static node *parse_expr(uint32_t iexp) ++static node *parse_expr(uint32_t term_tc) + { + node sn; + node *cn = &sn; + node *vn, *glptr; +- uint32_t tc, xtc; ++ uint32_t tc, expected_tc; + var *v; + +- debug_printf_parse("%s(%x)\n", __func__, iexp); ++ debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); ++ debug_parse_print_tc(term_tc); ++ debug_printf_parse("\n"); + + sn.info = PRIMASK; + sn.r.n = sn.a.n = glptr = NULL; +- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp; ++ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc; + +- while (!((tc = next_token(xtc)) & iexp)) { ++ while (!((tc = next_token(expected_tc)) & term_tc)) { + +- if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) { ++ if (glptr && (t_info == TI_LESS)) { + /* input redirection (<) attached to glptr node */ + debug_printf_parse("%s: input redir\n", __func__); + cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); + cn->a.n = glptr; +- xtc = TC_OPERAND | TC_UOPPRE; ++ expected_tc = TS_OPERAND | TS_UOPPRE; + glptr = NULL; +- +- } else if (tc & (TC_BINOP | TC_UOPPOST)) { +- debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); ++ continue; ++ } ++ if (tc & (TS_BINOP | TC_UOPPOST)) { ++ debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); + /* for binary and postfix-unary operators, jump back over + * previous operators with higher priority */ + vn = cn; + while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) +- || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) ++ || ((t_info == vn->info) && t_info == TI_COLON) + ) { + vn = vn->a.n; + if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); + } +- if ((t_info & OPCLSMASK) == OC_TERNARY) ++ if (t_info == TI_TERNARY) ++//TODO: why? + t_info += P(6); + cn = vn->a.n->r.n = new_node(t_info); + cn->a.n = vn->a.n; +- if (tc & TC_BINOP) { ++ if (tc & TS_BINOP) { + cn->l.n = vn; +- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; +- if ((t_info & OPCLSMASK) == OC_PGETLINE) { ++//FIXME: this is the place to detect and reject assignments to non-lvalues. ++//Currently we allow "assignments" to consts and temporaries, nonsense like this: ++// awk 'BEGIN { "qwe" = 1 }' ++// awk 'BEGIN { 7 *= 7 }' ++// awk 'BEGIN { length("qwe") = 1 }' ++// awk 'BEGIN { (1+1) += 3 }' ++ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; ++ if (t_info == TI_PGETLINE) { + /* it's a pipe */ + next_token(TC_GETLINE); + /* give maximum priority to this pipe */ + cn->info &= ~PRIMASK; +- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; ++ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; + } + } else { + cn->r.n = vn; +- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; ++ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; + } + vn->a.n = cn; ++ continue; ++ } + +- } else { +- debug_printf_parse("%s: other\n", __func__); +- /* for operands and prefix-unary operators, attach them +- * to last node */ +- vn = cn; +- cn = vn->r.n = new_node(t_info); +- cn->a.n = vn; +- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; +- if (tc & (TC_OPERAND | TC_REGEXP)) { +- debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__); +- xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp; +- /* one should be very careful with switch on tclass - +- * only simple tclasses should be used! */ +- switch (tc) { +- case TC_VARIABLE: +- case TC_ARRAY: +- debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); +- cn->info = OC_VAR; +- v = hash_search(ahash, t_string); +- if (v != NULL) { +- cn->info = OC_FNARG; +- cn->l.aidx = v->x.aidx; +- } else { +- cn->l.v = newvar(t_string); +- } +- if (tc & TC_ARRAY) { +- cn->info |= xS; +- cn->r.n = parse_expr(TC_ARRTERM); +- } +- break; ++ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); ++ /* for operands and prefix-unary operators, attach them ++ * to last node */ ++ vn = cn; ++ cn = vn->r.n = new_node(t_info); ++ cn->a.n = vn; + +- case TC_NUMBER: +- case TC_STRING: +- debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); +- cn->info = OC_VAR; +- v = cn->l.v = xzalloc(sizeof(var)); +- if (tc & TC_NUMBER) +- setvar_i(v, t_double); +- else { +- setvar_s(v, t_string); +- xtc &= ~TC_UOPPOST; /* "str"++ is not allowed */ +- } +- break; ++ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; ++ if (t_info == TI_PREINC || t_info == TI_PREDEC) ++ expected_tc = TS_LVALUE | TC_UOPPRE1; + +- case TC_REGEXP: +- debug_printf_parse("%s: TC_REGEXP\n", __func__); +- mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); +- break; ++ if (!(tc & (TS_OPERAND | TC_REGEXP))) ++ continue; + +- case TC_FUNCTION: +- debug_printf_parse("%s: TC_FUNCTION\n", __func__); +- cn->info = OC_FUNC; +- cn->r.f = newfunc(t_string); +- cn->l.n = condition(); +- break; ++ debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__); ++ expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc; ++ /* one should be very careful with switch on tclass - ++ * only simple tclasses should be used (TC_xyz, not TS_xyz) */ ++ switch (tc) { ++ case TC_VARIABLE: ++ case TC_ARRAY: ++ debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); ++ cn->info = OC_VAR; ++ v = hash_search(ahash, t_string); ++ if (v != NULL) { ++ cn->info = OC_FNARG; ++ cn->l.aidx = v->x.aidx; ++ } else { ++ cn->l.v = newvar(t_string); ++ } ++ if (tc & TC_ARRAY) { ++ cn->info |= xS; ++ cn->r.n = parse_expr(TC_ARRTERM); ++ } ++ break; + +- case TC_SEQSTART: +- debug_printf_parse("%s: TC_SEQSTART\n", __func__); +- cn = vn->r.n = parse_expr(TC_SEQTERM); +- if (!cn) +- syntax_error("Empty sequence"); +- cn->a.n = vn; +- break; ++ case TC_NUMBER: ++ case TC_STRING: ++ debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); ++ cn->info = OC_VAR; ++ v = cn->l.v = xzalloc(sizeof(var)); ++ if (tc & TC_NUMBER) ++ setvar_i(v, t_double); ++ else { ++ setvar_s(v, t_string); ++ expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ ++ } ++ break; + +- case TC_GETLINE: +- debug_printf_parse("%s: TC_GETLINE\n", __func__); +- glptr = cn; +- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; +- break; ++ case TC_REGEXP: ++ debug_printf_parse("%s: TC_REGEXP\n", __func__); ++ mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); ++ break; + +- case TC_BUILTIN: +- debug_printf_parse("%s: TC_BUILTIN\n", __func__); +- cn->l.n = condition(); +- break; ++ case TC_FUNCTION: ++ debug_printf_parse("%s: TC_FUNCTION\n", __func__); ++ cn->info = OC_FUNC; ++ cn->r.f = newfunc(t_string); ++ cn->l.n = parse_expr(TC_RPAREN); ++ break; + +- case TC_LENGTH: +- debug_printf_parse("%s: TC_LENGTH\n", __func__); +- next_token(TC_SEQSTART /* length(...) */ +- | TC_OPTERM /* length; (or newline)*/ +- | TC_GRPTERM /* length } */ +- | TC_BINOPX /* length <op> NUM */ +- | TC_COMMA /* print length, 1 */ +- ); +- rollback_token(); +- if (t_tclass & TC_SEQSTART) { +- /* It was a "(" token. Handle just like TC_BUILTIN */ +- cn->l.n = condition(); +- } +- break; +- } ++ case TC_LPAREN: ++ debug_printf_parse("%s: TC_LPAREN\n", __func__); ++ cn = vn->r.n = parse_expr(TC_RPAREN); ++ if (!cn) ++ syntax_error("Empty sequence"); ++ cn->a.n = vn; ++ break; ++ ++ case TC_GETLINE: ++ debug_printf_parse("%s: TC_GETLINE\n", __func__); ++ glptr = cn; ++ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; ++ break; ++ ++ case TC_BUILTIN: ++ debug_printf_parse("%s: TC_BUILTIN\n", __func__); ++ cn->l.n = parse_lrparen_list(); ++ break; ++ ++ case TC_LENGTH: ++ debug_printf_parse("%s: TC_LENGTH\n", __func__); ++ tc = next_token(TC_LPAREN /* length(...) */ ++ | TC_SEMICOL /* length; */ ++ | TC_NEWLINE /* length<newline> */ ++ | TC_RBRACE /* length } */ ++ | TC_BINOPX /* length <op> NUM */ ++ | TC_COMMA /* print length, 1 */ ++ ); ++ if (tc != TC_LPAREN) ++ rollback_token(); ++ else { ++ /* It was a "(" token. Handle just like TC_BUILTIN */ ++ cn->l.n = parse_expr(TC_RPAREN); + } ++ break; + } +- } ++ } /* while() */ + + debug_printf_parse("%s() returns %p\n", __func__, sn.r.n); + return sn.r.n; +@@ -1430,7 +1513,7 @@ static node *chain_node(uint32_t info) + if (seq->programname != g_progname) { + seq->programname = g_progname; + n = chain_node(OC_NEWSOURCE); +- n->l.new_progname = xstrdup(g_progname); ++ n->l.new_progname = g_progname; + } + + n = seq->last; +@@ -1446,14 +1529,16 @@ static void chain_expr(uint32_t info) + + n = chain_node(info); + +- n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM); ++ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE); + if ((info & OF_REQUIRED) && !n->l.n) + syntax_error(EMSG_TOO_FEW_ARGS); + +- if (t_tclass & TC_GRPTERM) ++ if (t_tclass & TC_RBRACE) + rollback_token(); + } + ++static void chain_group(void); ++ + static node *chain_loop(node *nn) + { + node *n, *n2, *save_brk, *save_cont; +@@ -1477,207 +1562,284 @@ static node *chain_loop(node *nn) + return n; + } + ++static void chain_until_rbrace(void) ++{ ++ uint32_t tc; ++ while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { ++ debug_printf_parse("%s: !TC_RBRACE\n", __func__); ++ if (tc == TC_NEWLINE) ++ continue; ++ rollback_token(); ++ chain_group(); ++ } ++ debug_printf_parse("%s: TC_RBRACE\n", __func__); ++} ++ + /* parse group and attach it to chain */ + static void chain_group(void) + { +- uint32_t c; ++ uint32_t tc; + node *n, *n2, *n3; + + do { +- c = next_token(TC_GRPSEQ); +- } while (c & TC_NEWLINE); +- +- if (c & TC_GRPSTART) { +- debug_printf_parse("%s: TC_GRPSTART\n", __func__); +- while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { +- debug_printf_parse("%s: !TC_GRPTERM\n", __func__); +- if (t_tclass & TC_NEWLINE) +- continue; +- rollback_token(); +- chain_group(); +- } +- debug_printf_parse("%s: TC_GRPTERM\n", __func__); +- } else if (c & (TC_OPSEQ | TC_OPTERM)) { +- debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__); ++ tc = next_token(TS_GRPSEQ); ++ } while (tc == TC_NEWLINE); ++ ++ if (tc == TC_LBRACE) { ++ debug_printf_parse("%s: TC_LBRACE\n", __func__); ++ chain_until_rbrace(); ++ return; ++ } ++ if (tc & (TS_OPSEQ | TC_SEMICOL)) { ++ debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__); + rollback_token(); + chain_expr(OC_EXEC | Vx); +- } else { +- /* TC_STATEMNT */ +- debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__); +- switch (t_info & OPCLSMASK) { +- case ST_IF: +- debug_printf_parse("%s: ST_IF\n", __func__); +- n = chain_node(OC_BR | Vx); +- n->l.n = condition(); ++ return; ++ } ++ ++ /* TS_STATEMNT */ ++ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); ++ switch (t_info & OPCLSMASK) { ++ case ST_IF: ++ debug_printf_parse("%s: ST_IF\n", __func__); ++ n = chain_node(OC_BR | Vx); ++ n->l.n = parse_lrparen_list(); ++ chain_group(); ++ n2 = chain_node(OC_EXEC); ++ n->r.n = seq->last; ++ if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) { + chain_group(); +- n2 = chain_node(OC_EXEC); +- n->r.n = seq->last; +- if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { +- chain_group(); +- n2->a.n = seq->last; +- } else { +- rollback_token(); +- } +- break; ++ n2->a.n = seq->last; ++ } else { ++ rollback_token(); ++ } ++ break; + +- case ST_WHILE: +- debug_printf_parse("%s: ST_WHILE\n", __func__); +- n2 = condition(); +- n = chain_loop(NULL); +- n->l.n = n2; +- break; ++ case ST_WHILE: ++ debug_printf_parse("%s: ST_WHILE\n", __func__); ++ n2 = parse_lrparen_list(); ++ n = chain_loop(NULL); ++ n->l.n = n2; ++ break; + +- case ST_DO: +- debug_printf_parse("%s: ST_DO\n", __func__); +- n2 = chain_node(OC_EXEC); +- n = chain_loop(NULL); +- n2->a.n = n->a.n; +- next_token(TC_WHILE); +- n->l.n = condition(); +- break; ++ case ST_DO: ++ debug_printf_parse("%s: ST_DO\n", __func__); ++ n2 = chain_node(OC_EXEC); ++ n = chain_loop(NULL); ++ n2->a.n = n->a.n; ++ next_token(TC_WHILE); ++ n->l.n = parse_lrparen_list(); ++ break; + +- case ST_FOR: +- debug_printf_parse("%s: ST_FOR\n", __func__); +- next_token(TC_SEQSTART); +- n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); +- if (t_tclass & TC_SEQTERM) { /* for-in */ +- if (!n2 || (n2->info & OPCLSMASK) != OC_IN) +- syntax_error(EMSG_UNEXP_TOKEN); +- n = chain_node(OC_WALKINIT | VV); +- n->l.n = n2->l.n; +- n->r.n = n2->r.n; +- n = chain_loop(NULL); +- n->info = OC_WALKNEXT | Vx; +- n->l.n = n2->l.n; +- } else { /* for (;;) */ +- n = chain_node(OC_EXEC | Vx); +- n->l.n = n2; +- n2 = parse_expr(TC_SEMICOL); +- n3 = parse_expr(TC_SEQTERM); +- n = chain_loop(n3); +- n->l.n = n2; +- if (!n2) +- n->info = OC_EXEC; +- } +- break; ++ case ST_FOR: ++ debug_printf_parse("%s: ST_FOR\n", __func__); ++ next_token(TC_LPAREN); ++ n2 = parse_expr(TC_SEMICOL | TC_RPAREN); ++ if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */ ++ if (!n2 || n2->info != TI_IN) ++ syntax_error(EMSG_UNEXP_TOKEN); ++ n = chain_node(OC_WALKINIT | VV); ++ n->l.n = n2->l.n; ++ n->r.n = n2->r.n; ++ n = chain_loop(NULL); ++ n->info = OC_WALKNEXT | Vx; ++ n->l.n = n2->l.n; ++ } else { /* for (;;) */ ++ n = chain_node(OC_EXEC | Vx); ++ n->l.n = n2; ++ n2 = parse_expr(TC_SEMICOL); ++ n3 = parse_expr(TC_RPAREN); ++ n = chain_loop(n3); ++ n->l.n = n2; ++ if (!n2) ++ n->info = OC_EXEC; ++ } ++ break; + +- case OC_PRINT: +- case OC_PRINTF: +- debug_printf_parse("%s: OC_PRINT[F]\n", __func__); +- n = chain_node(t_info); +- n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); +- if (t_tclass & TC_OUTRDR) { +- n->info |= t_info; +- n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM); +- } +- if (t_tclass & TC_GRPTERM) +- rollback_token(); +- break; ++ case OC_PRINT: ++ case OC_PRINTF: ++ debug_printf_parse("%s: OC_PRINT[F]\n", __func__); ++ n = chain_node(t_info); ++ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE); ++ if (t_tclass & TC_OUTRDR) { ++ n->info |= t_info; ++ n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE); ++ } ++ if (t_tclass & TC_RBRACE) ++ rollback_token(); ++ break; + +- case OC_BREAK: +- debug_printf_parse("%s: OC_BREAK\n", __func__); +- n = chain_node(OC_EXEC); +- n->a.n = break_ptr; +- chain_expr(t_info); +- break; ++ case OC_BREAK: ++ debug_printf_parse("%s: OC_BREAK\n", __func__); ++ n = chain_node(OC_EXEC); ++ if (!break_ptr) ++ syntax_error("'break' not in a loop"); ++ n->a.n = break_ptr; ++ chain_expr(t_info); ++ break; + +- case OC_CONTINUE: +- debug_printf_parse("%s: OC_CONTINUE\n", __func__); +- n = chain_node(OC_EXEC); +- n->a.n = continue_ptr; +- chain_expr(t_info); +- break; ++ case OC_CONTINUE: ++ debug_printf_parse("%s: OC_CONTINUE\n", __func__); ++ n = chain_node(OC_EXEC); ++ if (!continue_ptr) ++ syntax_error("'continue' not in a loop"); ++ n->a.n = continue_ptr; ++ chain_expr(t_info); ++ break; + +- /* delete, next, nextfile, return, exit */ +- default: +- debug_printf_parse("%s: default\n", __func__); +- chain_expr(t_info); +- } ++ /* delete, next, nextfile, return, exit */ ++ default: ++ debug_printf_parse("%s: default\n", __func__); ++ chain_expr(t_info); + } + } + + static void parse_program(char *p) + { +- uint32_t tclass; +- node *cn; +- func *f; +- var *v; ++ debug_printf_parse("%s()\n", __func__); + + g_pos = p; + t_lineno = 1; +- while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | +- TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { ++ for (;;) { ++ uint32_t tclass; + +- if (tclass & TC_OPTERM) { +- debug_printf_parse("%s: TC_OPTERM\n", __func__); ++ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL ++ | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */); ++ got_tok: ++ if (tclass == TC_EOF) { ++ debug_printf_parse("%s: TC_EOF\n", __func__); ++ break; ++ } ++ if (tclass == TC_NEWLINE) { ++ debug_printf_parse("%s: TC_NEWLINE\n", __func__); + continue; + } +- +- seq = &mainseq; +- if (tclass & TC_BEGIN) { ++ if (tclass == TC_BEGIN) { + debug_printf_parse("%s: TC_BEGIN\n", __func__); + seq = &beginseq; +- chain_group(); +- } else if (tclass & TC_END) { ++ /* ensure there is no newline between BEGIN and { */ ++ next_token(TC_LBRACE); ++ chain_until_rbrace(); ++ goto next_tok; ++ } ++ if (tclass == TC_END) { + debug_printf_parse("%s: TC_END\n", __func__); + seq = &endseq; +- chain_group(); +- } else if (tclass & TC_FUNCDECL) { ++ /* ensure there is no newline between END and { */ ++ next_token(TC_LBRACE); ++ chain_until_rbrace(); ++ goto next_tok; ++ } ++ if (tclass == TC_FUNCDECL) { ++ func *f; ++ + debug_printf_parse("%s: TC_FUNCDECL\n", __func__); + next_token(TC_FUNCTION); +- g_pos++; + f = newfunc(t_string); +- f->body.first = NULL; +- f->nargs = 0; +- /* Match func arg list: a comma sep list of >= 0 args, and a close paren */ +- while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) { +- /* Either an empty arg list, or trailing comma from prev iter +- * must be followed by an arg */ +- if (f->nargs == 0 && t_tclass == TC_SEQTERM) +- break; +- +- /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */ +- if (t_tclass != TC_VARIABLE) ++ if (f->defined) ++ syntax_error("Duplicate function"); ++ f->defined = 1; ++ //f->body.first = NULL; - already is ++ //f->nargs = 0; - already is ++ /* func arg list: comma sep list of args, and a close paren */ ++ for (;;) { ++ var *v; ++ if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { ++ if (f->nargs == 0) ++ break; /* func() is ok */ ++ /* func(a,) is not ok */ + syntax_error(EMSG_UNEXP_TOKEN); +- ++ } + v = findvar(ahash, t_string); + v->x.aidx = f->nargs++; +- + /* Arg followed either by end of arg list or 1 comma */ +- if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) ++ if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN) + break; +- if (t_tclass != TC_COMMA) +- syntax_error(EMSG_UNEXP_TOKEN); ++ /* it was a comma, we ate it */ + } + seq = &f->body; +- chain_group(); +- clear_array(ahash); +- } else if (tclass & TC_OPSEQ) { +- debug_printf_parse("%s: TC_OPSEQ\n", __func__); ++ /* ensure there is { after "func F(...)" - but newlines are allowed */ ++ while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) ++ continue; ++ chain_until_rbrace(); ++ hash_clear(ahash); ++ goto next_tok; ++ } ++ seq = &mainseq; ++ if (tclass & TS_OPSEQ) { ++ node *cn; ++ ++ debug_printf_parse("%s: TS_OPSEQ\n", __func__); + rollback_token(); + cn = chain_node(OC_TEST); +- cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); +- if (t_tclass & TC_GRPSTART) { +- debug_printf_parse("%s: TC_GRPSTART\n", __func__); +- rollback_token(); +- chain_group(); ++ cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE); ++ if (t_tclass == TC_LBRACE) { ++ debug_printf_parse("%s: TC_LBRACE\n", __func__); ++ chain_until_rbrace(); + } else { +- debug_printf_parse("%s: !TC_GRPSTART\n", __func__); ++ /* no action, assume default "{ print }" */ ++ debug_printf_parse("%s: !TC_LBRACE\n", __func__); + chain_node(OC_PRINT); + } + cn->r.n = mainseq.last; +- } else /* if (tclass & TC_GRPSTART) */ { +- debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__); +- rollback_token(); +- chain_group(); ++ goto next_tok; + } +- } +- debug_printf_parse("%s: TC_EOF\n", __func__); ++ /* tclass == TC_LBRACE */ ++ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); ++ chain_until_rbrace(); ++ next_tok: ++ /* Same as next_token() at the top of the loop, + TC_SEMICOL */ ++ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL ++ | TC_EOF | TC_NEWLINE | TC_SEMICOL); ++ /* gawk allows many newlines, but does not allow more than one semicolon: ++ * BEGIN {...}<newline>;<newline>; ++ * would complain "each rule must have a pattern or an action part". ++ * Same message for ++ * ; BEGIN {...} ++ */ ++ if (tclass != TC_SEMICOL) ++ goto got_tok; /* use this token */ ++ /* else: loop back - ate the semicolon, get and use _next_ token */ ++ } /* for (;;) */ + } + +- + /* -------- program execution part -------- */ + ++/* temporary variables allocator */ ++static var *nvalloc(int sz) ++{ ++ return xzalloc(sz * sizeof(var)); ++} ++ ++static void nvfree(var *v, int sz) ++{ ++ var *p = v; ++ ++ while (--sz >= 0) { ++ if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { ++ clear_array(iamarray(p)); ++ free(p->x.array->items); ++ free(p->x.array); ++ } ++ if (p->type & VF_WALK) { ++ walker_list *n; ++ walker_list *w = p->x.walker; ++ debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); ++ p->x.walker = NULL; ++ while (w) { ++ n = w->prev; ++ debug_printf_walker(" free(%p)\n", w); ++ free(w); ++ w = n; ++ } ++ } ++ clrvar(p); ++ p++; ++ } ++ ++ free(v); ++} ++ + static node *mk_splitter(const char *s, tsplitter *spl) + { + regex_t *re, *ire; +@@ -1686,7 +1848,7 @@ static node *mk_splitter(const char *s, tsplitter *spl) + re = &spl->re[0]; + ire = &spl->re[1]; + n = &spl->n; +- if ((n->info & OPCLSMASK) == OC_REGEXP) { ++ if (n->info == TI_REGEXP) { + regfree(re); + regfree(ire); // TODO: nuke ire, use re+1? + } +@@ -1699,21 +1861,28 @@ static node *mk_splitter(const char *s, tsplitter *spl) + return n; + } + +-/* use node as a regular expression. Supplied with node ptr and regex_t ++static var *evaluate(node *, var *); ++ ++/* Use node as a regular expression. Supplied with node ptr and regex_t + * storage space. Return ptr to regex (if result points to preg, it should +- * be later regfree'd manually ++ * be later regfree'd manually). + */ + static regex_t *as_regex(node *op, regex_t *preg) + { + int cflags; +- var *v; + const char *s; + +- if ((op->info & OPCLSMASK) == OC_REGEXP) { ++ if (op->info == TI_REGEXP) { + return icase ? op->r.ire : op->l.re; + } +- v = nvalloc(1); +- s = getvar_s(evaluate(op, v)); ++ ++ //tmpvar = nvalloc(1); ++#define TMPVAR (&G.as_regex__tmpvar) ++ // We use a single "static" tmpvar (instead of on-stack or malloced one) ++ // to decrease memory consumption in deeply-recursive awk programs. ++ // The rule to work safely is to never call evaluate() while our static ++ // TMPVAR's value is still needed. ++ s = getvar_s(evaluate(op, TMPVAR)); + + cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; + /* Testcase where REG_EXTENDED fails (unpaired '{'): +@@ -1725,7 +1894,8 @@ static regex_t *as_regex(node *op, regex_t *preg) + cflags &= ~REG_EXTENDED; + xregcomp(preg, s, cflags); + } +- nvfree(v); ++ //nvfree(tmpvar, 1); ++#undef TMPVAR + return preg; + } + +@@ -1745,12 +1915,22 @@ static char* qrealloc(char *b, int n, int *size) + /* resize field storage space */ + static void fsrealloc(int size) + { +- int i; ++ int i, newsize; + + if (size >= maxfields) { ++ /* Sanity cap, easier than catering for overflows */ ++ if (size > 0xffffff) ++ bb_die_memory_exhausted(); ++ + i = maxfields; + maxfields = size + 16; +- Fields = xrealloc(Fields, maxfields * sizeof(Fields[0])); ++ ++ newsize = maxfields * sizeof(Fields[0]); ++ debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize); ++ Fields = xrealloc(Fields, newsize); ++ debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1); ++ /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */ ++ + for (; i < maxfields; i++) { + Fields[i].type = VF_SPECIAL; + Fields[i].string = NULL; +@@ -1802,13 +1982,13 @@ static int awk_split(const char *s, node *spl, char **slist) + c[2] = '\n'; + + n = 0; +- if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ ++ if (spl->info == TI_REGEXP) { /* regex split */ + if (!*s) + return n; /* "": zero fields */ + n++; /* at least one field will be there */ + do { + int l; +- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... ++ regmatch_t pmatch[1]; + + l = strcspn(s, c+2); /* len till next NUL or \n */ + if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0 +@@ -1969,7 +2149,7 @@ static node *nextarg(node **pn) + node *n; + + n = *pn; +- if (n && (n->info & OPCLSMASK) == OC_COMMA) { ++ if (n && n->info == TI_COMMA) { + *pn = n->r.n; + n = n->l.n; + } else { +@@ -2000,8 +2180,7 @@ static void hashwalk_init(var *v, xhash *array) + for (i = 0; i < array->csize; i++) { + hi = array->items[i]; + while (hi) { +- strcpy(w->end, hi->name); +- nextword(&w->end); ++ w->end = stpcpy(w->end, hi->name) + 1; + hi = hi->next; + } + } +@@ -2027,15 +2206,18 @@ static int hashwalk_next(var *v) + /* evaluate node, return 1 when result is true, 0 otherwise */ + static int ptest(node *pattern) + { +- /* ptest__v is "static": to save stack space? */ +- return istrue(evaluate(pattern, &G.ptest__v)); ++ // We use a single "static" tmpvar (instead of on-stack or malloced one) ++ // to decrease memory consumption in deeply-recursive awk programs. ++ // The rule to work safely is to never call evaluate() while our static ++ // TMPVAR's value is still needed. ++ return istrue(evaluate(pattern, &G.ptest__tmpvar)); + } + + /* read next record from stream rsm into a variable v */ + static int awk_getline(rstream *rsm, var *v) + { + char *b; +- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... ++ regmatch_t pmatch[1]; + int size, a, p, pp = 0; + int fd, so, eo, r, rp; + char c, *m, *s; +@@ -2061,7 +2243,7 @@ static int awk_getline(rstream *rsm, var *v) + so = eo = p; + r = 1; + if (p > 0) { +- if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) { ++ if (rsplitter.n.info == TI_REGEXP) { + if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, + b, 1, pmatch, 0) == 0) { + so = pmatch[0].rm_so; +@@ -2133,82 +2315,126 @@ static int awk_getline(rstream *rsm, var *v) + return r; + } + +-static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) +-{ +- int r = 0; +- char c; +- const char *s = format; +- +- if (int_as_int && n == (long long)n) { +- r = snprintf(b, size, "%lld", (long long)n); +- } else { +- do { c = *s; } while (c && *++s); +- if (strchr("diouxX", c)) { +- r = snprintf(b, size, format, (int)n); +- } else if (strchr("eEfgG", c)) { +- r = snprintf(b, size, format, n); +- } else { +- syntax_error(EMSG_INV_FMT); +- } +- } +- return r; +-} +- + /* formatted output into an allocated buffer, return ptr to buffer */ +-static char *awk_printf(node *n) ++#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS ++# define awk_printf(a, b) awk_printf(a) ++#endif ++static char *awk_printf(node *n, size_t *len) + { +- char *b = NULL; +- char *fmt, *s, *f; +- const char *s1; +- int i, j, incr, bsize; +- char c, c1; +- var *v, *arg; +- +- v = nvalloc(1); +- fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v))); +- ++ char *b; ++ char *fmt, *f; ++ size_t i; ++ ++ //tmpvar = nvalloc(1); ++#define TMPVAR (&G.awk_printf__tmpvar) ++ // We use a single "static" tmpvar (instead of on-stack or malloced one) ++ // to decrease memory consumption in deeply-recursive awk programs. ++ // The rule to work safely is to never call evaluate() while our static ++ // TMPVAR's value is still needed. ++ fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR))); ++ // ^^^^^^^^^ here we immediately strdup() the value, so the later call ++ // to evaluate() potentially recursing into another awk_printf() can't ++ // mangle the value. ++ ++ b = NULL; + i = 0; +- while (*f) { ++ while (1) { /* "print one format spec" loop */ ++ char *s; ++ char c; ++ char sv; ++ var *arg; ++ size_t slen; ++ ++ /* Find end of the next format spec, or end of line */ + s = f; +- while (*f && (*f != '%' || *++f == '%')) +- f++; +- while (*f && !isalpha(*f)) { +- if (*f == '*') +- syntax_error("%*x formats are not supported"); ++ while (1) { ++ c = *f; ++ if (!c) /* no percent chars found at all */ ++ goto nul; + f++; ++ if (c == '%') ++ break; + } +- +- incr = (f - s) + MAXVARFMT; +- b = qrealloc(b, incr + i, &bsize); ++ /* we are past % in "....%..." */ + c = *f; +- if (c != '\0') ++ if (!c) /* "....%" */ ++ goto nul; ++ if (c == '%') { /* "....%%...." */ ++ slen = f - s; ++ s = xstrndup(s, slen); + f++; +- c1 = *f; ++ goto append; /* print "....%" part verbatim */ ++ } ++ while (1) { ++ if (isalpha(c)) ++ break; ++ if (c == '*') ++ syntax_error("%*x formats are not supported"); ++ c = *++f; ++ if (!c) { /* "....%...." and no letter found after % */ ++ /* Example: awk 'BEGIN { printf "^^^%^^^\n"; }' */ ++ nul: ++ slen = f - s; ++ goto tail; /* print remaining string, exit loop */ ++ } ++ } ++ /* we are at A in "....%...A..." */ ++ ++ arg = evaluate(nextarg(&n), TMPVAR); ++ ++ /* Result can be arbitrarily long. Example: ++ * printf "%99999s", "BOOM" ++ */ ++ sv = *++f; + *f = '\0'; +- arg = evaluate(nextarg(&n), v); +- +- j = i; +- if (c == 'c' || !c) { +- i += sprintf(b+i, s, is_numeric(arg) ? +- (char)getvar_i(arg) : *getvar_s(arg)); +- } else if (c == 's') { +- s1 = getvar_s(arg); +- b = qrealloc(b, incr+i+strlen(s1), &bsize); +- i += sprintf(b+i, s, s1); ++ if (c == 'c') { ++ char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg); ++ char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */); ++ slen = strlen(r); ++ if (cc == '\0') /* if cc is NUL, re-format the string with it */ ++ sprintf(r, s, cc); ++ s = r; + } else { +- i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE); ++ if (c == 's') { ++ s = xasprintf(s, getvar_s(arg)); ++ } else { ++ double d = getvar_i(arg); ++ if (strchr("diouxX", c)) { ++//TODO: make it wider here (%x -> %llx etc)? ++ s = xasprintf(s, (int)d); ++ } else if (strchr("eEfFgGaA", c)) { ++ s = xasprintf(s, d); ++ } else { ++//TODO: GNU Awk 5.0.1: printf "%W" prints "%W", does not error out ++ syntax_error(EMSG_INV_FMT); ++ } ++ } ++ slen = strlen(s); + } +- *f = c1; +- +- /* if there was an error while sprintf, return value is negative */ +- if (i < j) +- i = j; ++ *f = sv; ++ append: ++ if (i == 0) { ++ b = s; ++ i = slen; ++ continue; ++ } ++ tail: ++ b = xrealloc(b, i + slen + 1); ++ strcpy(b + i, s); ++ i += slen; ++ if (!c) /* s is NOT allocated and this is the last part of string? */ ++ break; ++ free(s); + } + + free(fmt); +- nvfree(v); +- b = xrealloc(b, i + 1); +- b[i] = '\0'; ++ //nvfree(tmpvar, 1); ++#undef TMPVAR ++ ++#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS ++ if (len) ++ *len = i; ++#endif + return b; + } + +@@ -2338,33 +2564,59 @@ static NOINLINE int do_mktime(const char *ds) + return mktime(&then); + } + ++/* Reduce stack usage in exec_builtin() by keeping match() code separate */ ++static NOINLINE var *do_match(node *an1, const char *as0) ++{ ++ regmatch_t pmatch[1]; ++ regex_t sreg, *re; ++ int n, start, len; ++ ++ re = as_regex(an1, &sreg); ++ n = regexec(re, as0, 1, pmatch, 0); ++ if (re == &sreg) ++ regfree(re); ++ start = 0; ++ len = -1; ++ if (n == 0) { ++ start = pmatch[0].rm_so + 1; ++ len = pmatch[0].rm_eo - pmatch[0].rm_so; ++ } ++ setvar_i(newvar("RLENGTH"), len); ++ return setvar_i(newvar("RSTART"), start); ++} ++ ++/* Reduce stack usage in evaluate() by keeping builtins' code separate */ + static NOINLINE var *exec_builtin(node *op, var *res) + { + #define tspl (G.exec_builtin__tspl) + +- var *tv; ++ var *tmpvars; + node *an[4]; + var *av[4]; + const char *as[4]; +- regmatch_t pmatch[2]; +- regex_t sreg, *re; + node *spl; + uint32_t isr, info; + int nargs; + time_t tt; + int i, l, ll, n; + +- tv = nvalloc(4); ++ tmpvars = nvalloc(4); ++#define TMPVAR0 (tmpvars) ++#define TMPVAR1 (tmpvars + 1) ++#define TMPVAR2 (tmpvars + 2) ++#define TMPVAR3 (tmpvars + 3) ++#define TMPVAR(i) (tmpvars + (i)) + isr = info = op->info; + op = op->l.n; + + av[2] = av[3] = NULL; + for (i = 0; i < 4 && op; i++) { + an[i] = nextarg(&op); +- if (isr & 0x09000000) +- av[i] = evaluate(an[i], &tv[i]); +- if (isr & 0x08000000) +- as[i] = getvar_s(av[i]); ++ if (isr & 0x09000000) { ++ av[i] = evaluate(an[i], TMPVAR(i)); ++ if (isr & 0x08000000) ++ as[i] = getvar_s(av[i]); ++ } + isr >>= 1; + } + +@@ -2386,8 +2638,8 @@ static NOINLINE var *exec_builtin(node *op, var *res) + char *s, *s1; + + if (nargs > 2) { +- spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? +- an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl); ++ spl = (an[2]->info == TI_REGEXP) ? an[2] ++ : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl); + } else { + spl = &fsplitter.n; + } +@@ -2501,20 +2753,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) + break; + + case B_ma: +- re = as_regex(an[1], &sreg); +- n = regexec(re, as[0], 1, pmatch, 0); +- if (n == 0) { +- pmatch[0].rm_so++; +- pmatch[0].rm_eo++; +- } else { +- pmatch[0].rm_so = 0; +- pmatch[0].rm_eo = -1; +- } +- setvar_i(newvar("RSTART"), pmatch[0].rm_so); +- setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); +- setvar_i(res, pmatch[0].rm_so); +- if (re == &sreg) +- regfree(re); ++ res = do_match(an[1], as[0]); + break; + + case B_ge: +@@ -2530,14 +2769,79 @@ static NOINLINE var *exec_builtin(node *op, var *res) + break; + } + +- nvfree(tv); ++ nvfree(tmpvars, 4); ++#undef TMPVAR0 ++#undef TMPVAR1 ++#undef TMPVAR2 ++#undef TMPVAR3 ++#undef TMPVAR ++ + return res; + #undef tspl + } + ++/* if expr looks like "var=value", perform assignment and return 1, ++ * otherwise return 0 */ ++static int is_assignment(const char *expr) ++{ ++ char *exprc, *val; ++ ++ val = (char*)endofname(expr); ++ if (val == (char*)expr || *val != '=') { ++ return FALSE; ++ } ++ ++ exprc = xstrdup(expr); ++ val = exprc + (val - expr); ++ *val++ = '\0'; ++ ++ unescape_string_in_place(val); ++ setvar_u(newvar(exprc), val); ++ free(exprc); ++ return TRUE; ++} ++ ++/* switch to next input file */ ++static rstream *next_input_file(void) ++{ ++#define rsm (G.next_input_file__rsm) ++#define files_happen (G.next_input_file__files_happen) ++ ++ const char *fname, *ind; ++ ++ if (rsm.F) ++ fclose(rsm.F); ++ rsm.F = NULL; ++ rsm.pos = rsm.adv = 0; ++ ++ for (;;) { ++ if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { ++ if (files_happen) ++ return NULL; ++ fname = "-"; ++ rsm.F = stdin; ++ break; ++ } ++ ind = getvar_s(incvar(intvar[ARGIND])); ++ fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); ++ if (fname && *fname && !is_assignment(fname)) { ++ rsm.F = xfopen_stdin(fname); ++ break; ++ } ++ } ++ ++ files_happen = TRUE; ++ setvar_s(intvar[FILENAME], fname); ++ return &rsm; ++#undef rsm ++#undef files_happen ++} ++ + /* + * Evaluate node - the heart of the program. Supplied with subtree +- * and place where to store result. returns ptr to result. ++ * and "res" variable to assign the result to if we evaluate an expression. ++ * If node refers to e.g. a variable or a field, no assignment happens. ++ * Return ptr to the result (which may or may not be the "res" variable!) + */ + #define XC(n) ((n) >> 8) + +@@ -2549,14 +2853,16 @@ static var *evaluate(node *op, var *res) + #define seed (G.evaluate__seed) + #define sreg (G.evaluate__sreg) + +- var *v1; ++ var *tmpvars; + + if (!op) + return setvar_s(res, NULL); + + debug_printf_eval("entered %s()\n", __func__); + +- v1 = nvalloc(2); ++ tmpvars = nvalloc(2); ++#define TMPVAR0 (tmpvars) ++#define TMPVAR1 (tmpvars + 1) + + while (op) { + struct { +@@ -2578,48 +2884,35 @@ static var *evaluate(node *op, var *res) + op1 = op->l.n; + debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); + +- /* "delete" is special: +- * "delete array[var--]" must evaluate index expr only once, +- * must not evaluate it in "execute inevitable things" part. +- */ +- if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) { +- uint32_t info = op1->info & OPCLSMASK; +- var *v; +- +- debug_printf_eval("DELETE\n"); +- if (info == OC_VAR) { +- v = op1->l.v; +- } else if (info == OC_FNARG) { +- v = &fnargs[op1->l.aidx]; +- } else { +- syntax_error(EMSG_NOT_ARRAY); ++ /* execute inevitable things */ ++ if (opinfo & OF_RES1) { ++ if ((opinfo & OF_REQUIRED) && !op1) ++ syntax_error(EMSG_TOO_FEW_ARGS); ++ L.v = evaluate(op1, TMPVAR0); ++ if (opinfo & OF_STR1) { ++ L.s = getvar_s(L.v); ++ debug_printf_eval("L.s:'%s'\n", L.s); + } +- if (op1->r.n) { /* array ref? */ +- const char *s; +- s = getvar_s(evaluate(op1->r.n, v1)); +- hash_remove(iamarray(v), s); +- } else { +- clear_array(iamarray(v)); ++ if (opinfo & OF_NUM1) { ++ L_d = getvar_i(L.v); ++ debug_printf_eval("L_d:%f\n", L_d); + } +- goto next; + } +- +- /* execute inevitable things */ +- if (opinfo & OF_RES1) +- L.v = evaluate(op1, v1); +- if (opinfo & OF_RES2) +- R.v = evaluate(op->r.n, v1+1); +- if (opinfo & OF_STR1) { +- L.s = getvar_s(L.v); +- debug_printf_eval("L.s:'%s'\n", L.s); +- } +- if (opinfo & OF_STR2) { +- R.s = getvar_s(R.v); +- debug_printf_eval("R.s:'%s'\n", R.s); +- } +- if (opinfo & OF_NUM1) { +- L_d = getvar_i(L.v); +- debug_printf_eval("L_d:%f\n", L_d); ++ /* NB: Must get string/numeric values of L (done above) ++ * _before_ evaluate()'ing R.v: if both L and R are $NNNs, ++ * and right one is large, then L.v points to Fields[NNN1], ++ * second evaluate() reallocates and moves (!) Fields[], ++ * R.v points to Fields[NNN2] but L.v now points to freed mem! ++ * (Seen trying to evaluate "$444 $44444") ++ */ ++ if (opinfo & OF_RES2) { ++ R.v = evaluate(op->r.n, TMPVAR1); ++ //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? ++ //L.v = NULL; ++ if (opinfo & OF_STR2) { ++ R.s = getvar_s(R.v); ++ debug_printf_eval("R.s:'%s'\n", R.s); ++ } + } + + debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); +@@ -2629,7 +2922,8 @@ static var *evaluate(node *op, var *res) + + /* test pattern */ + case XC( OC_TEST ): +- if ((op1->info & OPCLSMASK) == OC_COMMA) { ++ debug_printf_eval("TEST\n"); ++ if (op1->info == TI_COMMA) { + /* it's range pattern */ + if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) { + op->info |= OF_CHECKED; +@@ -2646,25 +2940,32 @@ static var *evaluate(node *op, var *res) + + /* just evaluate an expression, also used as unconditional jump */ + case XC( OC_EXEC ): ++ debug_printf_eval("EXEC\n"); + break; + + /* branch, used in if-else and various loops */ + case XC( OC_BR ): ++ debug_printf_eval("BR\n"); + op = istrue(L.v) ? op->a.n : op->r.n; + break; + + /* initialize for-in loop */ + case XC( OC_WALKINIT ): ++ debug_printf_eval("WALKINIT\n"); + hashwalk_init(L.v, iamarray(R.v)); + break; + + /* get next array item */ + case XC( OC_WALKNEXT ): ++ debug_printf_eval("WALKNEXT\n"); + op = hashwalk_next(L.v) ? op->a.n : op->r.n; + break; + + case XC( OC_PRINT ): +- case XC( OC_PRINTF ): { ++ debug_printf_eval("PRINT /\n"); ++ case XC( OC_PRINTF ): ++ debug_printf_eval("PRINTF\n"); ++ { + FILE *F = stdout; + + if (op->r.n) { +@@ -2682,55 +2983,94 @@ static var *evaluate(node *op, var *res) + F = rsm->F; + } + ++ /* Can't just check 'opinfo == OC_PRINT' here, parser ORs ++ * additional bits to opinfos of print/printf with redirects ++ */ + if ((opinfo & OPCLSMASK) == OC_PRINT) { + if (!op1) { + fputs(getvar_s(intvar[F0]), F); + } else { +- while (op1) { +- var *v = evaluate(nextarg(&op1), v1); ++ for (;;) { ++ var *v = evaluate(nextarg(&op1), TMPVAR0); + if (v->type & VF_NUMBER) { +- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), +- getvar_i(v), TRUE); ++ fmt_num(getvar_s(intvar[OFMT]), ++ getvar_i(v)); + fputs(g_buf, F); + } else { + fputs(getvar_s(v), F); + } +- +- if (op1) +- fputs(getvar_s(intvar[OFS]), F); ++ if (!op1) ++ break; ++ fputs(getvar_s(intvar[OFS]), F); + } + } + fputs(getvar_s(intvar[ORS]), F); +- +- } else { /* OC_PRINTF */ +- char *s = awk_printf(op1); ++ } else { /* PRINTF */ ++ IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;) ++ char *s = awk_printf(op1, &len); ++#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS ++ fwrite(s, len, 1, F); ++#else + fputs(s, F); ++#endif + free(s); + } + fflush(F); + break; + } + +- /* case XC( OC_DELETE ): - moved to happen before arg evaluation */ ++ case XC( OC_DELETE ): ++ debug_printf_eval("DELETE\n"); ++ { ++ /* "delete" is special: ++ * "delete array[var--]" must evaluate index expr only once. ++ */ ++ uint32_t info = op1->info & OPCLSMASK; ++ var *v; ++ ++ if (info == OC_VAR) { ++ v = op1->l.v; ++ } else if (info == OC_FNARG) { ++ v = &fnargs[op1->l.aidx]; ++ } else { ++ syntax_error(EMSG_NOT_ARRAY); ++ } ++ if (op1->r.n) { /* array ref? */ ++ const char *s; ++ s = getvar_s(evaluate(op1->r.n, TMPVAR0)); ++ hash_remove(iamarray(v), s); ++ } else { ++ clear_array(iamarray(v)); ++ } ++ break; ++ } + + case XC( OC_NEWSOURCE ): ++ debug_printf_eval("NEWSOURCE\n"); + g_progname = op->l.new_progname; + break; + + case XC( OC_RETURN ): ++ debug_printf_eval("RETURN\n"); + copyvar(res, L.v); + break; + + case XC( OC_NEXTFILE ): ++ debug_printf_eval("NEXTFILE\n"); + nextfile = TRUE; + case XC( OC_NEXT ): ++ debug_printf_eval("NEXT\n"); + nextrec = TRUE; + case XC( OC_DONE ): ++ debug_printf_eval("DONE\n"); + clrvar(res); + break; + + case XC( OC_EXIT ): +- awk_exit(L_d); ++ debug_printf_eval("EXIT\n"); ++ if (op1) ++ G.exitcode = (int)L_d; ++ awk_exit(); + + /* -- recursive node type -- */ + +@@ -2749,15 +3089,18 @@ static var *evaluate(node *op, var *res) + break; + + case XC( OC_IN ): ++ debug_printf_eval("IN\n"); + setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0); + break; + + case XC( OC_REGEXP ): ++ debug_printf_eval("REGEXP\n"); + op1 = op; + L.s = getvar_s(intvar[F0]); + goto re_cont; + + case XC( OC_MATCH ): ++ debug_printf_eval("MATCH\n"); + op1 = op->r.n; + re_cont: + { +@@ -2772,61 +3115,80 @@ static var *evaluate(node *op, var *res) + case XC( OC_MOVE ): + debug_printf_eval("MOVE\n"); + /* if source is a temporary string, jusk relink it to dest */ +-//Disabled: if R.v is numeric but happens to have cached R.v->string, +-//then L.v ends up being a string, which is wrong +-// if (R.v == v1+1 && R.v->string) { +-// res = setvar_p(L.v, R.v->string); +-// R.v->string = NULL; +-// } else { ++ if (R.v == TMPVAR1 ++ && !(R.v->type & VF_NUMBER) ++ /* Why check !NUMBER? if R.v is a number but has cached R.v->string, ++ * L.v ends up a string, which is wrong */ ++ /*&& R.v->string - always not NULL (right?) */ ++ ) { ++ res = setvar_p(L.v, R.v->string); /* avoids strdup */ ++ R.v->string = NULL; ++ } else { + res = copyvar(L.v, R.v); +-// } ++ } + break; + + case XC( OC_TERNARY ): +- if ((op->r.n->info & OPCLSMASK) != OC_COLON) ++ debug_printf_eval("TERNARY\n"); ++ if (op->r.n->info != TI_COLON) + syntax_error(EMSG_POSSIBLE_ERROR); + res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); + break; + + case XC( OC_FUNC ): { +- var *vbeg, *v; ++ var *argvars, *sv_fnargs; + const char *sv_progname; ++ int nargs, i; + +- /* The body might be empty, still has to eval the args */ +- if (!op->r.n->info && !op->r.f->body.first) ++ debug_printf_eval("FUNC\n"); ++ ++ if (!op->r.f->defined) + syntax_error(EMSG_UNDEF_FUNC); + +- vbeg = v = nvalloc(op->r.f->nargs + 1); ++ /* The body might be empty, still has to eval the args */ ++ nargs = op->r.f->nargs; ++ argvars = nvalloc(nargs); ++ i = 0; + while (op1) { +- var *arg = evaluate(nextarg(&op1), v1); +- copyvar(v, arg); +- v->type |= VF_CHILD; +- v->x.parent = arg; +- if (++v - vbeg >= op->r.f->nargs) +- break; ++ var *arg = evaluate(nextarg(&op1), TMPVAR0); ++ if (i == nargs) { ++ /* call with more arguments than function takes. ++ * (gawk warns: "warning: function 'f' called with more arguments than declared"). ++ * They are still evaluated, but discarded: */ ++ clrvar(arg); ++ continue; ++ } ++ copyvar(&argvars[i], arg); ++ argvars[i].type |= VF_CHILD; ++ argvars[i].x.parent = arg; ++ i++; + } + +- v = fnargs; +- fnargs = vbeg; ++ sv_fnargs = fnargs; + sv_progname = g_progname; + ++ fnargs = argvars; + res = evaluate(op->r.f->body.first, res); ++ nvfree(argvars, nargs); + + g_progname = sv_progname; +- nvfree(fnargs); +- fnargs = v; ++ fnargs = sv_fnargs; + + break; + } + + case XC( OC_GETLINE ): +- case XC( OC_PGETLINE ): { ++ debug_printf_eval("GETLINE /\n"); ++ case XC( OC_PGETLINE ): ++ debug_printf_eval("PGETLINE\n"); ++ { + rstream *rsm; + int i; + + if (op1) { + rsm = newfile(L.s); + if (!rsm->F) { ++ /* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */ + if ((opinfo & OPCLSMASK) == OC_PGETLINE) { + rsm->F = popen(L.s, "r"); + rsm->is_pipe = TRUE; +@@ -2861,16 +3223,34 @@ static var *evaluate(node *op, var *res) + /* simple builtins */ + case XC( OC_FBLTIN ): { + double R_d = R_d; /* for compiler */ ++ debug_printf_eval("FBLTIN\n"); ++ ++ if (op1 && op1->info == TI_COMMA) ++ /* Simple builtins take one arg maximum */ ++ syntax_error("Too many arguments"); + + switch (opn) { + case F_in: + R_d = (long long)L_d; + break; + +- case F_rn: +- R_d = (double)rand() / (double)RAND_MAX; ++ case F_rn: /*rand*/ ++ if (op1) ++ syntax_error("Too many arguments"); ++ { ++#if RAND_MAX >= 0x7fffffff ++ uint32_t u = ((uint32_t)rand() << 16) ^ rand(); ++ uint64_t v = ((uint64_t)rand() << 32) | u; ++ /* the above shift+or is optimized out on 32-bit arches */ ++# if RAND_MAX > 0x7fffffff ++ v &= 0x7fffffffffffffffULL; ++# endif ++ R_d = (double)v / 0x8000000000000000ULL; ++#else ++# error Not implemented for this value of RAND_MAX ++#endif + break; +- ++ } + case F_co: + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = cos(L_d); +@@ -2910,7 +3290,9 @@ static var *evaluate(node *op, var *res) + srand(seed); + break; + +- case F_ti: ++ case F_ti: /*systime*/ ++ if (op1) ++ syntax_error("Too many arguments"); + R_d = time(NULL); + break; + +@@ -2949,7 +3331,7 @@ static var *evaluate(node *op, var *res) + rstream *rsm; + int err = 0; + rsm = (rstream *)hash_search(fdhash, L.s); +- debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm); ++ debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm); + if (rsm) { + debug_printf_eval("OC_FBLTIN F_cl " + "rsm->is_pipe:%d, ->F:%p\n", +@@ -2960,6 +3342,11 @@ static var *evaluate(node *op, var *res) + */ + if (rsm->F) + err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); ++//TODO: fix this case: ++// $ awk 'BEGIN { print close(""); print ERRNO }' ++// -1 ++// close of redirection that was never opened ++// (we print 0, 0) + free(rsm->buffer); + hash_remove(fdhash, L.s); + } +@@ -2974,14 +3361,18 @@ static var *evaluate(node *op, var *res) + } + + case XC( OC_BUILTIN ): ++ debug_printf_eval("BUILTIN\n"); + res = exec_builtin(op, res); + break; + + case XC( OC_SPRINTF ): +- setvar_p(res, awk_printf(op1)); ++ debug_printf_eval("SPRINTF\n"); ++ setvar_p(res, awk_printf(op1, NULL)); + break; + +- case XC( OC_UNARY ): { ++ case XC( OC_UNARY ): ++ debug_printf_eval("UNARY\n"); ++ { + double Ld, R_d; + + Ld = R_d = getvar_i(R.v); +@@ -3011,7 +3402,9 @@ static var *evaluate(node *op, var *res) + break; + } + +- case XC( OC_FIELD ): { ++ case XC( OC_FIELD ): ++ debug_printf_eval("FIELD\n"); ++ { + int i = (int)getvar_i(R.v); + if (i < 0) + syntax_error(EMSG_NEGATIVE_FIELD); +@@ -3028,26 +3421,33 @@ static var *evaluate(node *op, var *res) + + /* concatenation (" ") and index joining (",") */ + case XC( OC_CONCAT ): ++ debug_printf_eval("CONCAT /\n"); + case XC( OC_COMMA ): { + const char *sep = ""; +- if ((opinfo & OPCLSMASK) == OC_COMMA) ++ debug_printf_eval("COMMA\n"); ++ if (opinfo == TI_COMMA) + sep = getvar_s(intvar[SUBSEP]); + setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s)); + break; + } + + case XC( OC_LAND ): ++ debug_printf_eval("LAND\n"); + setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0); + break; + + case XC( OC_LOR ): ++ debug_printf_eval("LOR\n"); + setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n)); + break; + + case XC( OC_BINARY ): +- case XC( OC_REPLACE ): { ++ debug_printf_eval("BINARY /\n"); ++ case XC( OC_REPLACE ): ++ debug_printf_eval("REPLACE\n"); ++ { + double R_d = getvar_i(R.v); +- debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn); ++ debug_printf_eval("R_d:%f opn:%c\n", R_d, opn); + switch (opn) { + case '+': + L_d += R_d; +@@ -3083,6 +3483,7 @@ static var *evaluate(node *op, var *res) + case XC( OC_COMPARE ): { + int i = i; /* for compiler */ + double Ld; ++ debug_printf_eval("COMPARE\n"); + + if (is_numeric(L.v) && is_numeric(R.v)) { + Ld = getvar_i(L.v) - getvar_i(R.v); +@@ -3109,7 +3510,7 @@ static var *evaluate(node *op, var *res) + default: + syntax_error(EMSG_POSSIBLE_ERROR); + } /* switch */ +- next: ++ + if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS) + op = op->a.n; + if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS) +@@ -3118,7 +3519,10 @@ static var *evaluate(node *op, var *res) + break; + } /* while (op) */ + +- nvfree(v1); ++ nvfree(tmpvars, 2); ++#undef TMPVAR0 ++#undef TMPVAR1 ++ + debug_printf_eval("returning from %s(): %p\n", __func__, res); + return res; + #undef fnargs +@@ -3126,25 +3530,21 @@ static var *evaluate(node *op, var *res) + #undef sreg + } + +- + /* -------- main & co. -------- */ + +-static int awk_exit(int r) ++static int awk_exit(void) + { +- var tv; + unsigned i; +- hash_item *hi; +- +- zero_out_var(&tv); + + if (!exiting) { + exiting = TRUE; + nextrec = FALSE; +- evaluate(endseq.first, &tv); ++ evaluate(endseq.first, &G.exit__tmpvar); + } + + /* waiting for children */ + for (i = 0; i < fdhash->csize; i++) { ++ hash_item *hi; + hi = fdhash->items[i]; + while (hi) { + if (hi->data.rs.F && hi->data.rs.is_pipe) +@@ -3153,65 +3553,7 @@ static int awk_exit(int r) + } + } + +- exit(r); +-} +- +-/* if expr looks like "var=value", perform assignment and return 1, +- * otherwise return 0 */ +-static int is_assignment(const char *expr) +-{ +- char *exprc, *val; +- +- if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { +- return FALSE; +- } +- +- exprc = xstrdup(expr); +- val = exprc + (val - expr); +- *val++ = '\0'; +- +- unescape_string_in_place(val); +- setvar_u(newvar(exprc), val); +- free(exprc); +- return TRUE; +-} +- +-/* switch to next input file */ +-static rstream *next_input_file(void) +-{ +-#define rsm (G.next_input_file__rsm) +-#define files_happen (G.next_input_file__files_happen) +- +- FILE *F; +- const char *fname, *ind; +- +- if (rsm.F) +- fclose(rsm.F); +- rsm.F = NULL; +- rsm.pos = rsm.adv = 0; +- +- for (;;) { +- if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { +- if (files_happen) +- return NULL; +- fname = "-"; +- F = stdin; +- break; +- } +- ind = getvar_s(incvar(intvar[ARGIND])); +- fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); +- if (fname && *fname && !is_assignment(fname)) { +- F = xfopen_stdin(fname); +- break; +- } +- } +- +- files_happen = TRUE; +- setvar_s(intvar[FILENAME], fname); +- rsm.F = F; +- return &rsm; +-#undef rsm +-#undef files_happen ++ exit(G.exitcode); + } + + int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +@@ -3224,12 +3566,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) + #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS + llist_t *list_e = NULL; + #endif +- int i, j; +- var *v; +- var tv; +- char **envp; +- char *vnames = (char *)vNames; /* cheat */ +- char *vvalues = (char *)vValues; ++ int i; + + INIT_G(); + +@@ -3238,48 +3575,43 @@ int awk_main(int argc UNUSED_PARAM, char **argv) + if (ENABLE_LOCALE_SUPPORT) + setlocale(LC_NUMERIC, "C"); + +- zero_out_var(&tv); +- +- /* allocate global buffer */ +- g_buf = xmalloc(MAXVARFMT + 1); +- +- vhash = hash_init(); +- ahash = hash_init(); +- fdhash = hash_init(); +- fnhash = hash_init(); +- + /* initialize variables */ +- for (i = 0; *vnames; i++) { +- intvar[i] = v = newvar(nextword(&vnames)); +- if (*vvalues != '\377') +- setvar_s(v, nextword(&vvalues)); +- else +- setvar_i(v, 0); +- +- if (*vnames == '*') { +- v->type |= VF_SPECIAL; +- vnames++; ++ vhash = hash_init(); ++ { ++ char *vnames = (char *)vNames; /* cheat */ ++ char *vvalues = (char *)vValues; ++ for (i = 0; *vnames; i++) { ++ var *v; ++ intvar[i] = v = newvar(nextword(&vnames)); ++ if (*vvalues != '\377') ++ setvar_s(v, nextword(&vvalues)); ++ else ++ setvar_i(v, 0); ++ ++ if (*vnames == '*') { ++ v->type |= VF_SPECIAL; ++ vnames++; ++ } + } + } + + handle_special(intvar[FS]); + handle_special(intvar[RS]); + +- newfile("/dev/stdin")->F = stdin; +- newfile("/dev/stdout")->F = stdout; +- newfile("/dev/stderr")->F = stderr; +- + /* Huh, people report that sometimes environ is NULL. Oh well. */ +- if (environ) for (envp = environ; *envp; envp++) { +- /* environ is writable, thus we don't strdup it needlessly */ +- char *s = *envp; +- char *s1 = strchr(s, '='); +- if (s1) { +- *s1 = '\0'; +- /* Both findvar and setvar_u take const char* +- * as 2nd arg -> environment is not trashed */ +- setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1); +- *s1 = '='; ++ if (environ) { ++ char **envp; ++ for (envp = environ; *envp; envp++) { ++ /* environ is writable, thus we don't strdup it needlessly */ ++ char *s = *envp; ++ char *s1 = strchr(s, '='); ++ if (s1) { ++ *s1 = '\0'; ++ /* Both findvar and setvar_u take const char* ++ * as 2nd arg -> environment is not trashed */ ++ setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1); ++ *s1 = '='; ++ } + } + } + opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL); +@@ -3295,20 +3627,19 @@ int awk_main(int argc UNUSED_PARAM, char **argv) + if (!is_assignment(llist_pop(&list_v))) + bb_show_usage(); + } ++ ++ /* Parse all supplied programs */ ++ fnhash = hash_init(); ++ ahash = hash_init(); + while (list_f) { +- char *s = NULL; +- FILE *from_file; ++ int fd; ++ char *s; + + g_progname = llist_pop(&list_f); +- from_file = xfopen_stdin(g_progname); +- /* one byte is reserved for some trick in next_token */ +- for (i = j = 1; j > 0; i += j) { +- s = xrealloc(s, i + 4096); +- j = fread(s + i, 1, 4094, from_file); +- } +- s[i] = '\0'; +- fclose(from_file); +- parse_program(s + 1); ++ fd = xopen_stdin(g_progname); ++ s = xmalloc_read(fd, NULL); /* it's NUL-terminated */ ++ close(fd); ++ parse_program(s); + free(s); + } + g_progname = "cmd. line"; +@@ -3317,11 +3648,23 @@ int awk_main(int argc UNUSED_PARAM, char **argv) + parse_program(llist_pop(&list_e)); + } + #endif ++//FIXME: preserve order of -e and -f ++//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish + if (!(opt & (OPT_f | OPT_e))) { + if (!*argv) + bb_show_usage(); + parse_program(*argv++); + } ++ /* Free unused parse structures */ ++ //hash_free(fnhash); // ~250 bytes when empty, used only for function names ++ //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs ++ // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not). ++ free(fnhash->items); ++ free(fnhash); ++ fnhash = NULL; // debug ++ //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing ++ ++ /* Parsing done, on to executing */ + + /* fill in ARGV array */ + setari_u(intvar[ARGV], 0, "awk"); +@@ -3330,9 +3673,14 @@ int awk_main(int argc UNUSED_PARAM, char **argv) + setari_u(intvar[ARGV], ++i, *argv++); + setvar_i(intvar[ARGC], i + 1); + +- evaluate(beginseq.first, &tv); ++ //fdhash = ahash; // done via define ++ newfile("/dev/stdin")->F = stdin; ++ newfile("/dev/stdout")->F = stdout; ++ newfile("/dev/stderr")->F = stderr; ++ ++ evaluate(beginseq.first, &G.main__tmpvar); + if (!mainseq.first && !endseq.first) +- awk_exit(EXIT_SUCCESS); ++ awk_exit(); + + /* input file could already be opened in BEGIN block */ + if (!iF) +@@ -3347,7 +3695,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) + nextrec = FALSE; + incvar(intvar[NR]); + incvar(intvar[FNR]); +- evaluate(mainseq.first, &tv); ++ evaluate(mainseq.first, &G.main__tmpvar); + + if (nextfile) + break; +@@ -3359,6 +3707,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) + iF = next_input_file(); + } + +- awk_exit(EXIT_SUCCESS); ++ awk_exit(); + /*return 0;*/ + } +diff --git a/testsuite/awk.tests b/testsuite/awk.tests +index 92c83d719..4a7a01245 100755 +--- a/testsuite/awk.tests ++++ b/testsuite/awk.tests +@@ -44,6 +44,16 @@ testing "awk handles empty function f(arg){}" \ + "L1\n\nL2\n\n" \ + "" "" + ++prg=' ++function empty_fun(){} ++END {empty_fun() ++ print "Ok" ++}' ++testing "awk handles empty function f(){}" \ ++ "awk '$prg'" \ ++ "Ok\n" \ ++ "" "" ++ + prg=' + function outer_fun() { + return 1 +@@ -71,6 +81,23 @@ testing "awk properly handles undefined function" \ + "L1\n\nawk: cmd. line:5: Call to undefined function\n" \ + "" "" + ++prg=' ++BEGIN { ++ v=1 ++ a=2 ++ print v (a) ++}' ++testing "awk 'v (a)' is not a function call, it is a concatenation" \ ++ "awk '$prg' 2>&1" \ ++ "12\n" \ ++ "" "" ++ ++prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}' ++testing "awk unused function args are evaluated" \ ++ "awk '$prg' 2>&1" \ ++ "G\nG\nF\n" \ ++ "" "" ++ + + optional DESKTOP + testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n" +@@ -352,19 +379,14 @@ testing "awk -e and ARGC" \ + "" + SKIP= + +-# The examples are in fact not valid awk programs (break/continue +-# can only be used inside loops). +-# But we do accept them outside of loops. +-# We had a bug with misparsing "break ; else" sequence. +-# Test that *that* bug is fixed, using simplest possible scripts: + testing "awk break" \ + "awk -f - 2>&1; echo \$?" \ +- "0\n" \ ++ "awk: -:1: 'break' not in a loop\n1\n" \ + "" \ + 'BEGIN { if (1) break; else a = 1 }' + testing "awk continue" \ + "awk -f - 2>&1; echo \$?" \ +- "0\n" \ ++ "awk: -:1: 'continue' not in a loop\n1\n" \ + "" \ + 'BEGIN { if (1) continue; else a = 1 }' + +@@ -383,6 +405,11 @@ testing "awk errors on missing delete arg" \ + "awk -e '{delete}' 2>&1" "awk: cmd. line:1: Too few arguments\n" "" "" + SKIP= + ++optional FEATURE_AWK_GNU_EXTENSIONS ++testing "awk printf('%c') can output NUL" \ ++ "awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n" ++SKIP= ++ + # testing "description" "command" "result" "infile" "stdin" + testing 'awk negative field access' \ + 'awk 2>&1 -- '\''{ $(-1) }'\' \ +@@ -413,4 +440,25 @@ testing 'awk $NF is empty' \ + '' \ + 'a=====123=' + ++testing "awk exit N propagates through END's exit" \ ++ "awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \ ++ "42\n" \ ++ '' '' ++ ++testing "awk print + redirect" \ ++ "awk 'BEGIN { print \"STDERR %s\" >\"/dev/stderr\" }' 2>&1" \ ++ "STDERR %s\n" \ ++ '' '' ++ ++testing "awk \"cmd\" | getline" \ ++ "awk 'BEGIN { \"echo HELLO\" | getline; print }'" \ ++ "HELLO\n" \ ++ '' '' ++ ++# printf %% should print one % (had a bug where it didn't) ++testing 'awk printf %% prints one %' \ ++ "awk 'BEGIN { printf \"%%\n\" }'" \ ++ "%\n" \ ++ '' '' ++ + exit $FAILCOUNT +diff --git a/testsuite/printf.tests b/testsuite/printf.tests +index 34a65926e..050edef71 100755 +--- a/testsuite/printf.tests ++++ b/testsuite/printf.tests +@@ -79,6 +79,11 @@ testing "printf understands %Ld" \ + "-5\n""0\n" \ + "" "" + ++testing "printf understands %%" \ ++ "${bb}printf '%%\n' 2>&1; echo \$?" \ ++ "%\n""0\n" \ ++ "" "" ++ + testing "printf handles positive numbers for %d" \ + "${bb}printf '%d\n' 3 +3 ' 3' ' +3' 2>&1; echo \$?" \ + "3\n"\ +-- +2.33.0 + diff --git a/meta/recipes-core/busybox/busybox/0002-man-fix-segfault-in-man-1.patch b/meta/recipes-core/busybox/busybox/0002-man-fix-segfault-in-man-1.patch new file mode 100644 index 0000000000..4a930b7b6f --- /dev/null +++ b/meta/recipes-core/busybox/busybox/0002-man-fix-segfault-in-man-1.patch @@ -0,0 +1,30 @@ +From 4975cace9bf96bfde174f8bb5cc4068d2ea294d4 Mon Sep 17 00:00:00 2001 +From: Denys Vlasenko <vda.linux@googlemail.com> +Date: Tue, 15 Jun 2021 14:47:46 +0200 +Subject: [PATCH] man: fix segfault in "man 1" + +function old new delta +man_main 942 954 +12 + +Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com> + +Upstream-Status: Backport [4d4fc5ca5ee4f] +CVE: CVE-2021-42373 +Signed-off-by: Chen Qi <Qi.Chen@windriver.com> +--- + miscutils/man.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/miscutils/man.c b/miscutils/man.c +index 722f6641e..d319e8bba 100644 +--- a/miscutils/man.c ++++ b/miscutils/man.c +@@ -324,7 +324,7 @@ int man_main(int argc UNUSED_PARAM, char **argv) + + /* is 1st ARG a SECTION? */ + sec_list = conf_sec_list; +- if (is_section_name(conf_sec_list, *argv)) { ++ if (is_section_name(conf_sec_list, *argv) && argv[1]) { + /* yes */ + sec_list = *argv++; + } diff --git a/meta/recipes-core/busybox/busybox_1.33.1.bb b/meta/recipes-core/busybox/busybox_1.33.2.bb index 4002d6a5c6..4a0d3b4556 100644 --- a/meta/recipes-core/busybox/busybox_1.33.1.bb +++ b/meta/recipes-core/busybox/busybox_1.33.2.bb @@ -48,7 +48,9 @@ SRC_URI = "https://busybox.net/downloads/busybox-${PV}.tar.bz2;name=tarball \ file://0001-sysctl-ignore-EIO-of-stable_secret-below-proc-sys-ne.patch \ file://0001-gen_build_files-Use-C-locale-when-calling-sed-on-glo.patch \ file://0001-mktemp-add-tmpdir-option.patch \ + file://0001-awk-fix-CVEs.patch \ + file://0002-man-fix-segfault-in-man-1.patch \ " SRC_URI_append_libc-musl = " file://musl.cfg " -SRC_URI[tarball.sha256sum] = "12cec6bd2b16d8a9446dd16130f2b92982f1819f6e1c5f5887b6db03f5660d28" +SRC_URI[tarball.sha256sum] = "6843ba7977081e735fa0fdb05893e3c002c8c5ad7c9c80da206e603cc0ac47e7" diff --git a/meta/recipes-core/dbus-wait/dbus-wait_git.bb b/meta/recipes-core/dbus-wait/dbus-wait_git.bb index 677768d35a..b39f7523c0 100644 --- a/meta/recipes-core/dbus-wait/dbus-wait_git.bb +++ b/meta/recipes-core/dbus-wait/dbus-wait_git.bb @@ -11,7 +11,7 @@ SRCREV = "6cc6077a36fe2648a5f993fe7c16c9632f946517" PV = "0.1+git${SRCPV}" PR = "r2" -SRC_URI = "git://git.yoctoproject.org/${BPN}" +SRC_URI = "git://git.yoctoproject.org/${BPN};branch=master" UPSTREAM_CHECK_COMMITS = "1" S = "${WORKDIR}/git" diff --git a/meta/recipes-core/expat/expat/CVE-2021-45960.patch b/meta/recipes-core/expat/expat/CVE-2021-45960.patch new file mode 100644 index 0000000000..523449e22c --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2021-45960.patch @@ -0,0 +1,65 @@ +From 0adcb34c49bee5b19bd29b16a578c510c23597ea Mon Sep 17 00:00:00 2001 +From: Sebastian Pipping <sebastian@pipping.org> +Date: Mon, 27 Dec 2021 20:15:02 +0100 +Subject: [PATCH] lib: Detect and prevent troublesome left shifts in function + storeAtts (CVE-2021-45960) + +Upstream-Status: Backport: +https://github.com/libexpat/libexpat/pull/534/commits/0adcb34c49bee5b19bd29b16a578c510c23597ea + +CVE: CVE-2021-45960 +Signed-off-by: Steve Sakoman <steve@sakoman.com> + +--- + expat/lib/xmlparse.c | 31 +++++++++++++++++++++++++++++-- + 1 file changed, 29 insertions(+), 2 deletions(-) + +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index d730f41c3..b47c31b05 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -3414,7 +3414,13 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, + if (nPrefixes) { + int j; /* hash table index */ + unsigned long version = parser->m_nsAttsVersion; +- int nsAttsSize = (int)1 << parser->m_nsAttsPower; ++ ++ /* Detect and prevent invalid shift */ ++ if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { ++ return XML_ERROR_NO_MEMORY; ++ } ++ ++ unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; + unsigned char oldNsAttsPower = parser->m_nsAttsPower; + /* size of hash table must be at least 2 * (# of prefixed attributes) */ + if ((nPrefixes << 1) +@@ -3425,7 +3431,28 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, + ; + if (parser->m_nsAttsPower < 3) + parser->m_nsAttsPower = 3; +- nsAttsSize = (int)1 << parser->m_nsAttsPower; ++ ++ /* Detect and prevent invalid shift */ ++ if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { ++ /* Restore actual size of memory in m_nsAtts */ ++ parser->m_nsAttsPower = oldNsAttsPower; ++ return XML_ERROR_NO_MEMORY; ++ } ++ ++ nsAttsSize = 1u << parser->m_nsAttsPower; ++ ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++#if UINT_MAX >= SIZE_MAX ++ if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { ++ /* Restore actual size of memory in m_nsAtts */ ++ parser->m_nsAttsPower = oldNsAttsPower; ++ return XML_ERROR_NO_MEMORY; ++ } ++#endif ++ + temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, + nsAttsSize * sizeof(NS_ATT)); + if (! temp) { diff --git a/meta/recipes-core/expat/expat/CVE-2021-46143.patch b/meta/recipes-core/expat/expat/CVE-2021-46143.patch new file mode 100644 index 0000000000..b1a726d9a8 --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2021-46143.patch @@ -0,0 +1,49 @@ +From 85ae9a2d7d0e9358f356b33977b842df8ebaec2b Mon Sep 17 00:00:00 2001 +From: Sebastian Pipping <sebastian@pipping.org> +Date: Sat, 25 Dec 2021 20:52:08 +0100 +Subject: [PATCH] lib: Prevent integer overflow on m_groupSize in function + doProlog (CVE-2021-46143) + +Upstream-Status: Backport: +https://github.com/libexpat/libexpat/pull/538/commits/85ae9a2d7d0e9358f356b33977b842df8ebaec2b + +CVE: CVE-2021-46143 + +Signed-off-by: Steve Sakoman <steve@sakoman.com> +--- + expat/lib/xmlparse.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index b47c31b0..8f243126 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -5046,6 +5046,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + if (parser->m_prologState.level >= parser->m_groupSize) { + if (parser->m_groupSize) { + { ++ /* Detect and prevent integer overflow */ ++ if (parser->m_groupSize > (unsigned int)(-1) / 2u) { ++ return XML_ERROR_NO_MEMORY; ++ } ++ + char *const new_connector = (char *)REALLOC( + parser, parser->m_groupConnector, parser->m_groupSize *= 2); + if (new_connector == NULL) { +@@ -5056,6 +5061,16 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + } + + if (dtd->scaffIndex) { ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++#if UINT_MAX >= SIZE_MAX ++ if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { ++ return XML_ERROR_NO_MEMORY; ++ } ++#endif ++ + int *const new_scaff_index = (int *)REALLOC( + parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); + if (new_scaff_index == NULL) diff --git a/meta/recipes-core/expat/expat/CVE-2022-22822-27.patch b/meta/recipes-core/expat/expat/CVE-2022-22822-27.patch new file mode 100644 index 0000000000..e569fbc7ab --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2022-22822-27.patch @@ -0,0 +1,257 @@ +From 9f93e8036e842329863bf20395b8fb8f73834d9e Mon Sep 17 00:00:00 2001 +From: Sebastian Pipping <sebastian@pipping.org> +Date: Thu, 30 Dec 2021 22:46:03 +0100 +Subject: [PATCH] lib: Prevent integer overflow at multiple places + (CVE-2022-22822 to CVE-2022-22827) + +The involved functions are: +- addBinding (CVE-2022-22822) +- build_model (CVE-2022-22823) +- defineAttribute (CVE-2022-22824) +- lookup (CVE-2022-22825) +- nextScaffoldPart (CVE-2022-22826) +- storeAtts (CVE-2022-22827) + +Upstream-Status: Backport: +https://github.com/libexpat/libexpat/pull/539/commits/9f93e8036e842329863bf20395b8fb8f73834d9e + +CVE: CVE-2022-22822 CVE-2022-22823 CVE-2022-22824 CVE-2022-22825 CVE-2022-22826 CVE-2022-22827 +Signed-off-by: Steve Sakoman <steve@sakoman.com> + +--- + expat/lib/xmlparse.c | 153 ++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 151 insertions(+), 2 deletions(-) + +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index 8f243126..575e73ee 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -3261,13 +3261,38 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, + + /* get the attributes from the tokenizer */ + n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); ++ ++ /* Detect and prevent integer overflow */ ++ if (n > INT_MAX - nDefaultAtts) { ++ return XML_ERROR_NO_MEMORY; ++ } ++ + if (n + nDefaultAtts > parser->m_attsSize) { + int oldAttsSize = parser->m_attsSize; + ATTRIBUTE *temp; + #ifdef XML_ATTR_INFO + XML_AttrInfo *temp2; + #endif ++ ++ /* Detect and prevent integer overflow */ ++ if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) ++ || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { ++ return XML_ERROR_NO_MEMORY; ++ } ++ + parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; ++ ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++#if UINT_MAX >= SIZE_MAX ++ if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { ++ parser->m_attsSize = oldAttsSize; ++ return XML_ERROR_NO_MEMORY; ++ } ++#endif ++ + temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, + parser->m_attsSize * sizeof(ATTRIBUTE)); + if (temp == NULL) { +@@ -3276,6 +3301,17 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, + } + parser->m_atts = temp; + #ifdef XML_ATTR_INFO ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++# if UINT_MAX >= SIZE_MAX ++ if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { ++ parser->m_attsSize = oldAttsSize; ++ return XML_ERROR_NO_MEMORY; ++ } ++# endif ++ + temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, + parser->m_attsSize * sizeof(XML_AttrInfo)); + if (temp2 == NULL) { +@@ -3610,9 +3646,31 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, + tagNamePtr->prefixLen = prefixLen; + for (i = 0; localPart[i++];) + ; /* i includes null terminator */ ++ ++ /* Detect and prevent integer overflow */ ++ if (binding->uriLen > INT_MAX - prefixLen ++ || i > INT_MAX - (binding->uriLen + prefixLen)) { ++ return XML_ERROR_NO_MEMORY; ++ } ++ + n = i + binding->uriLen + prefixLen; + if (n > binding->uriAlloc) { + TAG *p; ++ ++ /* Detect and prevent integer overflow */ ++ if (n > INT_MAX - EXPAND_SPARE) { ++ return XML_ERROR_NO_MEMORY; ++ } ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++#if UINT_MAX >= SIZE_MAX ++ if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { ++ return XML_ERROR_NO_MEMORY; ++ } ++#endif ++ + uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); + if (! uri) + return XML_ERROR_NO_MEMORY; +@@ -3708,6 +3766,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + if (parser->m_freeBindingList) { + b = parser->m_freeBindingList; + if (len > b->uriAlloc) { ++ /* Detect and prevent integer overflow */ ++ if (len > INT_MAX - EXPAND_SPARE) { ++ return XML_ERROR_NO_MEMORY; ++ } ++ ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++#if UINT_MAX >= SIZE_MAX ++ if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { ++ return XML_ERROR_NO_MEMORY; ++ } ++#endif ++ + XML_Char *temp = (XML_Char *)REALLOC( + parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); + if (temp == NULL) +@@ -3720,6 +3793,21 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + b = (BINDING *)MALLOC(parser, sizeof(BINDING)); + if (! b) + return XML_ERROR_NO_MEMORY; ++ ++ /* Detect and prevent integer overflow */ ++ if (len > INT_MAX - EXPAND_SPARE) { ++ return XML_ERROR_NO_MEMORY; ++ } ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++#if UINT_MAX >= SIZE_MAX ++ if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { ++ return XML_ERROR_NO_MEMORY; ++ } ++#endif ++ + b->uri + = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); + if (! b->uri) { +@@ -6141,7 +6229,24 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, + } + } else { + DEFAULT_ATTRIBUTE *temp; ++ ++ /* Detect and prevent integer overflow */ ++ if (type->allocDefaultAtts > INT_MAX / 2) { ++ return 0; ++ } ++ + int count = type->allocDefaultAtts * 2; ++ ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++#if UINT_MAX >= SIZE_MAX ++ if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { ++ return 0; ++ } ++#endif ++ + temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, + (count * sizeof(DEFAULT_ATTRIBUTE))); + if (temp == NULL) +@@ -6792,8 +6897,20 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { + /* check for overflow (table is half full) */ + if (table->used >> (table->power - 1)) { + unsigned char newPower = table->power + 1; ++ ++ /* Detect and prevent invalid shift */ ++ if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { ++ return NULL; ++ } ++ + size_t newSize = (size_t)1 << newPower; + unsigned long newMask = (unsigned long)newSize - 1; ++ ++ /* Detect and prevent integer overflow */ ++ if (newSize > (size_t)(-1) / sizeof(NAMED *)) { ++ return NULL; ++ } ++ + size_t tsize = newSize * sizeof(NAMED *); + NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize); + if (! newV) +@@ -7143,6 +7260,20 @@ nextScaffoldPart(XML_Parser parser) { + if (dtd->scaffCount >= dtd->scaffSize) { + CONTENT_SCAFFOLD *temp; + if (dtd->scaffold) { ++ /* Detect and prevent integer overflow */ ++ if (dtd->scaffSize > UINT_MAX / 2u) { ++ return -1; ++ } ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++#if UINT_MAX >= SIZE_MAX ++ if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { ++ return -1; ++ } ++#endif ++ + temp = (CONTENT_SCAFFOLD *)REALLOC( + parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); + if (temp == NULL) +@@ -7212,8 +7343,26 @@ build_model(XML_Parser parser) { + XML_Content *ret; + XML_Content *cpos; + XML_Char *str; +- int allocsize = (dtd->scaffCount * sizeof(XML_Content) +- + (dtd->contentStringLen * sizeof(XML_Char))); ++ ++ /* Detect and prevent integer overflow. ++ * The preprocessor guard addresses the "always false" warning ++ * from -Wtype-limits on platforms where ++ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ ++#if UINT_MAX >= SIZE_MAX ++ if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { ++ return NULL; ++ } ++ if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { ++ return NULL; ++ } ++#endif ++ if (dtd->scaffCount * sizeof(XML_Content) ++ > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { ++ return NULL; ++ } ++ ++ const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) ++ + (dtd->contentStringLen * sizeof(XML_Char))); + + ret = (XML_Content *)MALLOC(parser, allocsize); + if (! ret) diff --git a/meta/recipes-core/expat/expat/CVE-2022-23852.patch b/meta/recipes-core/expat/expat/CVE-2022-23852.patch new file mode 100644 index 0000000000..41425c108b --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2022-23852.patch @@ -0,0 +1,33 @@ +From 847a645152f5ebc10ac63b74b604d0c1a79fae40 Mon Sep 17 00:00:00 2001 +From: Samanta Navarro <ferivoz@riseup.net> +Date: Sat, 22 Jan 2022 17:48:00 +0100 +Subject: [PATCH] lib: Detect and prevent integer overflow in XML_GetBuffer + (CVE-2022-23852) + +Upstream-Status: Backport: +https://github.com/libexpat/libexpat/commit/847a645152f5ebc10ac63b74b604d0c1a79fae40 + +CVE: CVE-2022-23852 + +Signed-off-by: Steve Sakoman <steve@sakoman.com> + +--- + expat/lib/xmlparse.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c +index d54af683..5ce31402 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -2067,6 +2067,11 @@ XML_GetBuffer(XML_Parser parser, int len) { + keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; ++ /* Detect and prevent integer overflow */ ++ if (keep > INT_MAX - neededSize) { ++ parser->m_errorCode = XML_ERROR_NO_MEMORY; ++ return NULL; ++ } + neededSize += keep; + #endif /* defined XML_CONTEXT_BYTES */ + if (neededSize diff --git a/meta/recipes-core/expat/expat/CVE-2022-23990.patch b/meta/recipes-core/expat/expat/CVE-2022-23990.patch new file mode 100644 index 0000000000..c599517b3e --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2022-23990.patch @@ -0,0 +1,49 @@ +From ede41d1e186ed2aba88a06e84cac839b770af3a1 Mon Sep 17 00:00:00 2001 +From: Sebastian Pipping <sebastian@pipping.org> +Date: Wed, 26 Jan 2022 02:36:43 +0100 +Subject: [PATCH] lib: Prevent integer overflow in doProlog (CVE-2022-23990) + +The change from "int nameLen" to "size_t nameLen" +addresses the overflow on "nameLen++" in code +"for (; name[nameLen++];)" right above the second +change in the patch. + +Upstream-Status: Backport: +https://github.com/libexpat/libexpat/pull/551/commits/ede41d1e186ed2aba88a06e84cac839b770af3a1 + +CVE: CVE-2022-23990 + +Signed-off-by: Steve Sakoman <steve@sakoman.com> + +--- + lib/xmlparse.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/lib/xmlparse.c b/expat/lib/xmlparse.c +index 5ce31402..d1d17005 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -5372,7 +5372,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + if (dtd->in_eldecl) { + ELEMENT_TYPE *el; + const XML_Char *name; +- int nameLen; ++ size_t nameLen; + const char *nxt + = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); + int myindex = nextScaffoldPart(parser); +@@ -5388,7 +5388,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + nameLen = 0; + for (; name[nameLen++];) + ; +- dtd->contentStringLen += nameLen; ++ ++ /* Detect and prevent integer overflow */ ++ if (nameLen > UINT_MAX - dtd->contentStringLen) { ++ return XML_ERROR_NO_MEMORY; ++ } ++ ++ dtd->contentStringLen += (unsigned)nameLen; + if (parser->m_elementDeclHandler) + handleDefault = XML_FALSE; + } diff --git a/meta/recipes-core/expat/expat/CVE-2022-25235.patch b/meta/recipes-core/expat/expat/CVE-2022-25235.patch new file mode 100644 index 0000000000..9febeae609 --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2022-25235.patch @@ -0,0 +1,261 @@ +Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/306b721] +CVE: CVE-2022-25235 + +The commit is a merge commit, and this patch is created by: + +$ git show -m -p --stat 306b72134f157bbfd1637b20a22cabf4acfa136a + +Remove modification for expat/Changes which fails to be applied. + +Signed-off-by: Kai Kang <kai.kang@windriver.com> + +commit 306b72134f157bbfd1637b20a22cabf4acfa136a (from 2cc97e875ef84da4bcf55156c83599116f7523b4) +Merge: 2cc97e87 c16300f0 +Author: Sebastian Pipping <sebastian@pipping.org> +Date: Fri Feb 18 20:12:32 2022 +0100 + + Merge pull request #562 from libexpat/utf8-security + + [CVE-2022-25235] lib: Protect against malformed encoding (e.g. malformed UTF-8) +--- + expat/Changes | 7 ++++ + expat/lib/xmltok.c | 5 --- + expat/lib/xmltok_impl.c | 18 ++++---- + expat/tests/runtests.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 127 insertions(+), 12 deletions(-) + +diff --git a/lib/xmltok.c b/lib/xmltok.c +index a72200e8..3bddf125 100644 +--- a/lib/xmltok.c ++++ b/lib/xmltok.c +@@ -98,11 +98,6 @@ + + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \ + & (1u << (((byte)[2]) & 0x1F))) + +-#define UTF8_GET_NAMING(pages, p, n) \ +- ((n) == 2 \ +- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ +- : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0)) +- + /* Detection of invalid UTF-8 sequences is based on Table 3.1B + of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ + with the additional restriction of not allowing the Unicode +diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c +index 0430591b..84ff35f9 100644 +--- a/lib/xmltok_impl.c ++++ b/lib/xmltok_impl.c +@@ -69,7 +69,7 @@ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ +- if (! IS_NAME_CHAR(enc, ptr, n)) { \ ++ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ +@@ -98,7 +98,7 @@ + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ +- if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \ ++ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \ + *nextTokPtr = ptr; \ + return XML_TOK_INVALID; \ + } \ +@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, + case BT_LEAD##n: \ + if (end - ptr < n) \ + return XML_TOK_PARTIAL_CHAR; \ ++ if (IS_INVALID_CHAR(enc, ptr, n)) { \ ++ *nextTokPtr = ptr; \ ++ return XML_TOK_INVALID; \ ++ } \ + if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ + ptr += n; \ + tok = XML_TOK_NAME; \ +@@ -1270,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, + switch (BYTE_TYPE(enc, ptr)) { + # define LEAD_CASE(n) \ + case BT_LEAD##n: \ +- ptr += n; \ ++ ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) + LEAD_CASE(3) +@@ -1339,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, + switch (BYTE_TYPE(enc, ptr)) { + # define LEAD_CASE(n) \ + case BT_LEAD##n: \ +- ptr += n; \ ++ ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) + LEAD_CASE(3) +@@ -1518,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, + state = inName; \ + } + # define LEAD_CASE(n) \ +- case BT_LEAD##n: \ ++ case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ + START_NAME ptr += (n - MINBPC(enc)); \ + break; + LEAD_CASE(2) +@@ -1730,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { + switch (BYTE_TYPE(enc, ptr)) { + # define LEAD_CASE(n) \ + case BT_LEAD##n: \ +- ptr += n; \ ++ ptr += n; /* NOTE: The encoding has already been validated. */ \ + break; + LEAD_CASE(2) + LEAD_CASE(3) +@@ -1775,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, + switch (BYTE_TYPE(enc, ptr)) { + # define LEAD_CASE(n) \ + case BT_LEAD##n: \ +- ptr += n; \ ++ ptr += n; /* NOTE: The encoding has already been validated. */ \ + pos->columnNumber++; \ + break; + LEAD_CASE(2) +diff --git a/tests/runtests.c b/tests/runtests.c +index bc5344b1..9b155b82 100644 +--- a/tests/runtests.c ++++ b/tests/runtests.c +@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) { + } + END_TEST + ++START_TEST(test_utf8_in_start_tags) { ++ struct test_case { ++ bool goodName; ++ bool goodNameStart; ++ const char *tagName; ++ }; ++ ++ // The idea with the tests below is this: ++ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences ++ // go to isNever and are hence not a concern. ++ // ++ // We start with a character that is a valid name character ++ // (or even name-start character, see XML 1.0r4 spec) and then we flip ++ // single bits at places where (1) the result leaves the UTF-8 encoding space ++ // and (2) we stay in the same n-byte sequence family. ++ // ++ // The flipped bits are highlighted in angle brackets in comments, ++ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped ++ // the most significant bit to 1 to leave UTF-8 encoding space. ++ struct test_case cases[] = { ++ // 1-byte UTF-8: [0xxx xxxx] ++ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' ++ {false, false, "\xBA"}, // [<1>011 1010] ++ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' ++ {false, false, "\xB9"}, // [<1>011 1001] ++ ++ // 2-byte UTF-8: [110x xxxx] [10xx xxxx] ++ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = ++ // Arabic small waw U+06E5 ++ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] ++ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] ++ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] ++ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = ++ // combining char U+0301 ++ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] ++ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] ++ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] ++ ++ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] ++ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = ++ // Devanagari Letter A U+0905 ++ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] ++ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] ++ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] ++ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] ++ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] ++ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = ++ // combining char U+0901 ++ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] ++ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] ++ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] ++ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] ++ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] ++ }; ++ const bool atNameStart[] = {true, false}; ++ ++ size_t i = 0; ++ char doc[1024]; ++ size_t failCount = 0; ++ ++ for (; i < sizeof(cases) / sizeof(cases[0]); i++) { ++ size_t j = 0; ++ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { ++ const bool expectedSuccess ++ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; ++ sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName); ++ XML_Parser parser = XML_ParserCreate(NULL); ++ ++ const enum XML_Status status ++ = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); ++ ++ bool success = true; ++ if ((status == XML_STATUS_OK) != expectedSuccess) { ++ success = false; ++ } ++ if ((status == XML_STATUS_ERROR) ++ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { ++ success = false; ++ } ++ ++ if (! success) { ++ fprintf( ++ stderr, ++ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", ++ (unsigned)i + 1u, atNameStart[j] ? " " : "not ", ++ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); ++ failCount++; ++ } ++ ++ XML_ParserFree(parser); ++ } ++ } ++ ++ if (failCount > 0) { ++ fail("UTF-8 regression detected"); ++ } ++} ++END_TEST ++ + /* Test trailing spaces in elements are accepted */ + static void XMLCALL + record_element_end_handler(void *userData, const XML_Char *name) { +@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) { + } + END_TEST + ++START_TEST(test_bad_doctype_utf8) { ++ const char *text = "<!DOCTYPE \xDB\x25" ++ "doc><doc/>"; // [1101 1011] [<0>010 0101] ++ expect_failure(text, XML_ERROR_INVALID_TOKEN, ++ "Invalid UTF-8 in DOCTYPE not faulted"); ++} ++END_TEST ++ + START_TEST(test_bad_doctype_utf16) { + const char text[] = + /* <!DOCTYPE doc [ \x06f2 ]><doc/> +@@ -11870,6 +11977,7 @@ make_suite(void) { + tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom); + tcase_add_test(tc_basic, test_utf8_in_cdata_section); + tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); ++ tcase_add_test(tc_basic, test_utf8_in_start_tags); + tcase_add_test(tc_basic, test_trailing_spaces_in_elements); + tcase_add_test(tc_basic, test_utf16_attribute); + tcase_add_test(tc_basic, test_utf16_second_attr); +@@ -11878,6 +11986,7 @@ make_suite(void) { + tcase_add_test(tc_basic, test_bad_attr_desc_keyword); + tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); + tcase_add_test(tc_basic, test_bad_doctype); ++ tcase_add_test(tc_basic, test_bad_doctype_utf8); + tcase_add_test(tc_basic, test_bad_doctype_utf16); + tcase_add_test(tc_basic, test_bad_doctype_plus); + tcase_add_test(tc_basic, test_bad_doctype_star); diff --git a/meta/recipes-core/expat/expat/CVE-2022-25236-1.patch b/meta/recipes-core/expat/expat/CVE-2022-25236-1.patch new file mode 100644 index 0000000000..ab53d99c8f --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2022-25236-1.patch @@ -0,0 +1,116 @@ +Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/2cc97e87] +CVE: CVE-2022-25236 + +The commit is a merge commit, and this patch is created by: + +$ git diff -p --stat 2cc97e87~ 2cc97e87 + +Remove modification for expat/Changes which fails to be applied. + +Signed-off-by: Kai Kang <kai.kang@windriver.com> + +commit 2cc97e875ef84da4bcf55156c83599116f7523b4 (from d477fdd284468f2ab822024e75702f2c1b254f42) +Merge: d477fdd2 e4d7e497 +Author: Sebastian Pipping <sebastian@pipping.org> +Date: Fri Feb 18 18:01:27 2022 +0100 + + Merge pull request #561 from libexpat/namesep-security + + [CVE-2022-25236] lib: Protect against insertion of namesep characters into namespace URIs + +--- + expat/Changes | 16 ++++++++++++++++ + expat/lib/xmlparse.c | 17 +++++++++++++---- + expat/tests/runtests.c | 30 ++++++++++++++++++++++++++++++ + 3 files changed, 59 insertions(+), 4 deletions(-) + +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index 7376aab1..c98e2e9f 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -718,8 +718,7 @@ XML_ParserCreate(const XML_Char *encodingName) { + + XML_Parser XMLCALL + XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { +- XML_Char tmp[2]; +- *tmp = nsSep; ++ XML_Char tmp[2] = {nsSep, 0}; + return XML_ParserCreate_MM(encodingName, NULL, tmp); + } + +@@ -1344,8 +1343,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, + would be otherwise. + */ + if (parser->m_ns) { +- XML_Char tmp[2]; +- *tmp = parser->m_namespaceSeparator; ++ XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; + parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); + } else { + parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); +@@ -3761,6 +3759,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + if (! mustBeXML && isXMLNS + && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) + isXMLNS = XML_FALSE; ++ ++ // NOTE: While Expat does not validate namespace URIs against RFC 3986, ++ // we have to at least make sure that the XML processor on top of ++ // Expat (that is splitting tag names by namespace separator into ++ // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused ++ // by an attacker putting additional namespace separator characters ++ // into namespace declarations. That would be ambiguous and not to ++ // be expected. ++ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { ++ return XML_ERROR_SYNTAX; ++ } + } + isXML = isXML && len == xmlLen; + isXMLNS = isXMLNS && len == xmlnsLen; +diff --git a/tests/runtests.c b/tests/runtests.c +index d07203f2..bc5344b1 100644 +--- a/tests/runtests.c ++++ b/tests/runtests.c +@@ -7220,6 +7220,35 @@ START_TEST(test_ns_double_colon_doctype) { + } + END_TEST + ++START_TEST(test_ns_separator_in_uri) { ++ struct test_case { ++ enum XML_Status expectedStatus; ++ const char *doc; ++ }; ++ struct test_case cases[] = { ++ {XML_STATUS_OK, "<doc xmlns='one_two' />"}, ++ {XML_STATUS_ERROR, "<doc xmlns='one
two' />"}, ++ }; ++ ++ size_t i = 0; ++ size_t failCount = 0; ++ for (; i < sizeof(cases) / sizeof(cases[0]); i++) { ++ XML_Parser parser = XML_ParserCreateNS(NULL, '\n'); ++ XML_SetElementHandler(parser, dummy_start_element, dummy_end_element); ++ if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc), ++ /*isFinal*/ XML_TRUE) ++ != cases[i].expectedStatus) { ++ failCount++; ++ } ++ XML_ParserFree(parser); ++ } ++ ++ if (failCount) { ++ fail("Namespace separator handling is broken"); ++ } ++} ++END_TEST ++ + /* Control variable; the number of times duff_allocator() will successfully + * allocate */ + #define ALLOC_ALWAYS_SUCCEED (-1) +@@ -11905,6 +11934,7 @@ make_suite(void) { + tcase_add_test(tc_namespace, test_ns_utf16_doctype); + tcase_add_test(tc_namespace, test_ns_invalid_doctype); + tcase_add_test(tc_namespace, test_ns_double_colon_doctype); ++ tcase_add_test(tc_namespace, test_ns_separator_in_uri); + + suite_add_tcase(s, tc_misc); + tcase_add_checked_fixture(tc_misc, NULL, basic_teardown); diff --git a/meta/recipes-core/expat/expat/CVE-2022-25236-2.patch b/meta/recipes-core/expat/expat/CVE-2022-25236-2.patch new file mode 100644 index 0000000000..0f14c9631b --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2022-25236-2.patch @@ -0,0 +1,232 @@ +Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/f178826b] +CVE: CVE-2022-25236 + +The commit is a merge commit, and this patch is created by: + +$ git show -m -p --stat f178826b + +Remove changes for expat/Changes and reference.html which fail to be applied. + +Signed-off-by: Kai Kang <kai.kang@windriver.com> + +commit f178826bb1e9c8ee23202f1be55ad4ac7b649e84 (from c99e0e7f2b15b48848038992ecbb4480f957cfe9) +Merge: c99e0e7f 9579f7ea +Author: Sebastian Pipping <sebastian@pipping.org> +Date: Fri Mar 4 18:43:39 2022 +0100 + + Merge pull request #577 from libexpat/namesep + + lib: Relax fix to CVE-2022-25236 with regard to RFC 3986 URI characters (fixes #572) +--- + expat/Changes | 16 ++++++ + expat/doc/reference.html | 8 +++ + expat/lib/expat.h | 11 ++++ + expat/lib/xmlparse.c | 139 ++++++++++++++++++++++++++++++++++++++++++++--- + expat/tests/runtests.c | 8 ++- + 5 files changed, 171 insertions(+), 11 deletions(-) + +diff --git a/lib/expat.h b/lib/expat.h +index 5ab493f7..181fc960 100644 +--- a/lib/expat.h ++++ b/lib/expat.h +@@ -239,6 +239,17 @@ XML_ParserCreate(const XML_Char *encoding); + and the local part will be concatenated without any separator. + It is a programming error to use the separator '\0' with namespace + triplets (see XML_SetReturnNSTriplet). ++ If a namespace separator is chosen that can be part of a URI or ++ part of an XML name, splitting an expanded name back into its ++ 1, 2 or 3 original parts on application level in the element handler ++ may end up vulnerable, so these are advised against; sane choices for ++ a namespace separator are e.g. '\n' (line feed) and '|' (pipe). ++ ++ Note that Expat does not validate namespace URIs (beyond encoding) ++ against RFC 3986 today (and is not required to do so with regard to ++ the XML 1.0 namespaces specification) but it may start doing that ++ in future releases. Before that, an application using Expat must ++ be ready to receive namespace URIs containing non-URI characters. + */ + XMLPARSEAPI(XML_Parser) + XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index 59da19c8..6fe2cf1e 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -3705,6 +3705,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, + return XML_ERROR_NONE; + } + ++static XML_Bool ++is_rfc3986_uri_char(XML_Char candidate) { ++ // For the RFC 3986 ANBF grammar see ++ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A ++ ++ switch (candidate) { ++ // From rule "ALPHA" (uppercase half) ++ case 'A': ++ case 'B': ++ case 'C': ++ case 'D': ++ case 'E': ++ case 'F': ++ case 'G': ++ case 'H': ++ case 'I': ++ case 'J': ++ case 'K': ++ case 'L': ++ case 'M': ++ case 'N': ++ case 'O': ++ case 'P': ++ case 'Q': ++ case 'R': ++ case 'S': ++ case 'T': ++ case 'U': ++ case 'V': ++ case 'W': ++ case 'X': ++ case 'Y': ++ case 'Z': ++ ++ // From rule "ALPHA" (lowercase half) ++ case 'a': ++ case 'b': ++ case 'c': ++ case 'd': ++ case 'e': ++ case 'f': ++ case 'g': ++ case 'h': ++ case 'i': ++ case 'j': ++ case 'k': ++ case 'l': ++ case 'm': ++ case 'n': ++ case 'o': ++ case 'p': ++ case 'q': ++ case 'r': ++ case 's': ++ case 't': ++ case 'u': ++ case 'v': ++ case 'w': ++ case 'x': ++ case 'y': ++ case 'z': ++ ++ // From rule "DIGIT" ++ case '0': ++ case '1': ++ case '2': ++ case '3': ++ case '4': ++ case '5': ++ case '6': ++ case '7': ++ case '8': ++ case '9': ++ ++ // From rule "pct-encoded" ++ case '%': ++ ++ // From rule "unreserved" ++ case '-': ++ case '.': ++ case '_': ++ case '~': ++ ++ // From rule "gen-delims" ++ case ':': ++ case '/': ++ case '?': ++ case '#': ++ case '[': ++ case ']': ++ case '@': ++ ++ // From rule "sub-delims" ++ case '!': ++ case '$': ++ case '&': ++ case '\'': ++ case '(': ++ case ')': ++ case '*': ++ case '+': ++ case ',': ++ case ';': ++ case '=': ++ return XML_TRUE; ++ ++ default: ++ return XML_FALSE; ++ } ++} ++ + /* addBinding() overwrites the value of prefix->binding without checking. + Therefore one must keep track of the old value outside of addBinding(). + */ +@@ -3763,14 +3874,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, + && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) + isXMLNS = XML_FALSE; + +- // NOTE: While Expat does not validate namespace URIs against RFC 3986, +- // we have to at least make sure that the XML processor on top of +- // Expat (that is splitting tag names by namespace separator into +- // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused +- // by an attacker putting additional namespace separator characters +- // into namespace declarations. That would be ambiguous and not to +- // be expected. +- if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { ++ // NOTE: While Expat does not validate namespace URIs against RFC 3986 ++ // today (and is not REQUIRED to do so with regard to the XML 1.0 ++ // namespaces specification) we have to at least make sure, that ++ // the application on top of Expat (that is likely splitting expanded ++ // element names ("qualified names") of form ++ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces ++ // in its element handler code) cannot be confused by an attacker ++ // putting additional namespace separator characters into namespace ++ // declarations. That would be ambiguous and not to be expected. ++ // ++ // While the HTML API docs of function XML_ParserCreateNS have been ++ // advising against use of a namespace separator character that can ++ // appear in a URI for >20 years now, some widespread applications ++ // are using URI characters (':' (colon) in particular) for a ++ // namespace separator, in practice. To keep these applications ++ // functional, we only reject namespaces URIs containing the ++ // application-chosen namespace separator if the chosen separator ++ // is a non-URI character with regard to RFC 3986. ++ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) ++ && ! is_rfc3986_uri_char(uri[len])) { + return XML_ERROR_SYNTAX; + } + } +diff --git a/tests/runtests.c b/tests/runtests.c +index 60da868e..712706c4 100644 +--- a/tests/runtests.c ++++ b/tests/runtests.c +@@ -7406,16 +7406,18 @@ START_TEST(test_ns_separator_in_uri) { + struct test_case { + enum XML_Status expectedStatus; + const char *doc; ++ XML_Char namesep; + }; + struct test_case cases[] = { +- {XML_STATUS_OK, "<doc xmlns='one_two' />"}, +- {XML_STATUS_ERROR, "<doc xmlns='one
two' />"}, ++ {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')}, ++ {XML_STATUS_ERROR, "<doc xmlns='one
two' />", XCS('\n')}, ++ {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')}, + }; + + size_t i = 0; + size_t failCount = 0; + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { +- XML_Parser parser = XML_ParserCreateNS(NULL, '\n'); ++ XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep); + XML_SetElementHandler(parser, dummy_start_element, dummy_end_element); + if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc), + /*isFinal*/ XML_TRUE) diff --git a/meta/recipes-core/expat/expat_2.2.10.bb b/meta/recipes-core/expat/expat_2.2.10.bb index 08e8ff1cea..f99fa7edb6 100644 --- a/meta/recipes-core/expat/expat_2.2.10.bb +++ b/meta/recipes-core/expat/expat_2.2.10.bb @@ -10,9 +10,17 @@ VERSION_TAG = "${@d.getVar('PV').replace('.', '_')}" SRC_URI = "https://github.com/libexpat/libexpat/releases/download/R_${VERSION_TAG}/expat-${PV}.tar.bz2 \ file://libtool-tag.patch \ - file://run-ptest \ - file://0001-Add-output-of-tests-result.patch \ - " + file://run-ptest \ + file://0001-Add-output-of-tests-result.patch \ + file://CVE-2022-22822-27.patch \ + file://CVE-2021-45960.patch \ + file://CVE-2021-46143.patch \ + file://CVE-2022-23852.patch \ + file://CVE-2022-23990.patch \ + file://CVE-2022-25235.patch \ + file://CVE-2022-25236-1.patch \ + file://CVE-2022-25236-2.patch \ + " UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/" diff --git a/meta/recipes-core/fts/fts_1.2.7.bb b/meta/recipes-core/fts/fts_1.2.7.bb index ea820cb0c3..d3b0f31eda 100644 --- a/meta/recipes-core/fts/fts_1.2.7.bb +++ b/meta/recipes-core/fts/fts_1.2.7.bb @@ -10,7 +10,7 @@ SECTION = "libs" SRCREV = "0bde52df588e8969879a2cae51c3a4774ec62472" -SRC_URI = "git://github.com/pullmoll/musl-fts.git" +SRC_URI = "git://github.com/pullmoll/musl-fts.git;branch=master;protocol=https" S = "${WORKDIR}/git" diff --git a/meta/recipes-core/glibc/cross-localedef-native_2.33.bb b/meta/recipes-core/glibc/cross-localedef-native_2.33.bb index ec59c6ba10..22d3f96c09 100644 --- a/meta/recipes-core/glibc/cross-localedef-native_2.33.bb +++ b/meta/recipes-core/glibc/cross-localedef-native_2.33.bb @@ -20,7 +20,7 @@ inherit native FILESEXTRAPATHS =. "${FILE_DIRNAME}/${PN}:${FILE_DIRNAME}/glibc:" SRC_URI = "${GLIBC_GIT_URI};branch=${SRCBRANCH};name=glibc \ - git://github.com/kraj/localedef;branch=master;name=localedef;destsuffix=git/localedef \ + git://github.com/kraj/localedef;branch=master;name=localedef;destsuffix=git/localedef;protocol=https \ \ file://0001-localedef-Add-hardlink-resolver-from-util-linux.patch \ file://0002-localedef-fix-ups-hardlink-to-make-it-compile.patch \ diff --git a/meta/recipes-core/glibc/glibc-version.inc b/meta/recipes-core/glibc/glibc-version.inc index 3a95173175..e1eefdee49 100644 --- a/meta/recipes-core/glibc/glibc-version.inc +++ b/meta/recipes-core/glibc/glibc-version.inc @@ -1,6 +1,6 @@ SRCBRANCH ?= "release/2.33/master" PV = "2.33" -SRCREV_glibc ?= "9826b03b747b841f5fc6de2054bf1ef3f5c4bdf3" +SRCREV_glibc ?= "3e2a15c666e40e5ee740e5079c56d83469280323" SRCREV_localedef ?= "bd644c9e6f3e20c5504da1488448173c69c56c28" GLIBC_GIT_URI ?= "git://sourceware.org/git/glibc.git" diff --git a/meta/recipes-core/glibc/glibc/0001-CVE-2021-38604.patch b/meta/recipes-core/glibc/glibc/0001-CVE-2021-38604.patch deleted file mode 100644 index 8a52ac957c..0000000000 --- a/meta/recipes-core/glibc/glibc/0001-CVE-2021-38604.patch +++ /dev/null @@ -1,40 +0,0 @@ -From b805aebd42364fe696e417808a700fdb9800c9e8 Mon Sep 17 00:00:00 2001 -From: Nikita Popov <npv1310@gmail.com> -Date: Mon, 9 Aug 2021 20:17:34 +0530 -Subject: [PATCH] librt: fix NULL pointer dereference (bug 28213) - -Helper thread frees copied attribute on NOTIFY_REMOVED message -received from the OS kernel. Unfortunately, it fails to check whether -copied attribute actually exists (data.attr != NULL). This worked -earlier because free() checks passed pointer before actually -attempting to release corresponding memory. But -__pthread_attr_destroy assumes pointer is not NULL. - -So passing NULL pointer to __pthread_attr_destroy will result in -segmentation fault. This scenario is possible if -notification->sigev_notify_attributes == NULL (which means default -thread attributes should be used). - -Upstream-Status: Backport [https://sourceware.org/git/?p=glibc.git;a=commit;h=b805aebd42364fe696e417808a700fdb9800c9e8] -CVE: CVE-2021-38604 - -Signed-off-by: Nikita Popov <npv1310@gmail.com> -Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org> -Signed-off-by: Vinay Kumar <vinay.m.engg@gmail.com> ---- - sysdeps/unix/sysv/linux/mq_notify.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/sysdeps/unix/sysv/linux/mq_notify.c b/sysdeps/unix/sysv/linux/mq_notify.c -index 6f46d29d1d..1714e1cc5f 100644 ---- a/sysdeps/unix/sysv/linux/mq_notify.c -+++ b/sysdeps/unix/sysv/linux/mq_notify.c -@@ -132,7 +132,7 @@ helper_thread (void *arg) - to wait until it is done with it. */ - (void) __pthread_barrier_wait (¬ify_barrier); - } -- else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED) -+ else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED && data.attr != NULL) - { - /* The only state we keep is the copy of the thread attributes. */ - pthread_attr_destroy (data.attr); diff --git a/meta/recipes-core/glibc/glibc/0001-nptl-Remove-private-futex-optimization-BZ-27304.patch b/meta/recipes-core/glibc/glibc/0001-nptl-Remove-private-futex-optimization-BZ-27304.patch deleted file mode 100644 index 39fde5b785..0000000000 --- a/meta/recipes-core/glibc/glibc/0001-nptl-Remove-private-futex-optimization-BZ-27304.patch +++ /dev/null @@ -1,49 +0,0 @@ -From c4ad832276f4dadfa40904109b26a521468f66bc Mon Sep 17 00:00:00 2001 -From: Florian Weimer <fweimer@redhat.com> -Date: Thu, 4 Feb 2021 15:00:20 +0100 -Subject: [PATCH] nptl: Remove private futex optimization [BZ #27304] - -It is effectively used, unexcept for pthread_cond_destroy, where we do -not want it; see bug 27304. The internal locks do not support a -process-shared mode. - -This fixes commit dc6cfdc934db9997c33728082d63552b9eee4563 ("nptl: -Move pthread_cond_destroy implementation into libc"). - -Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> - -Upstream-Status: Backport [https://sourceware.org/bugzilla/show_bug.cgi?id=27304] -Signed-off-by: Yanfei Xu <yanfei.xu@windriver.com> ---- - sysdeps/nptl/lowlevellock-futex.h | 14 +------------- - 1 file changed, 1 insertion(+), 13 deletions(-) - -diff --git a/sysdeps/nptl/lowlevellock-futex.h b/sysdeps/nptl/lowlevellock-futex.h -index ecb729da6b..ca96397a4a 100644 ---- a/sysdeps/nptl/lowlevellock-futex.h -+++ b/sysdeps/nptl/lowlevellock-futex.h -@@ -50,20 +50,8 @@ - #define LLL_SHARED FUTEX_PRIVATE_FLAG - - #ifndef __ASSEMBLER__ -- --# if IS_IN (libc) || IS_IN (rtld) --/* In libc.so or ld.so all futexes are private. */ --# define __lll_private_flag(fl, private) \ -- ({ \ -- /* Prevent warnings in callers of this macro. */ \ -- int __lll_private_flag_priv __attribute__ ((unused)); \ -- __lll_private_flag_priv = (private); \ -- ((fl) | FUTEX_PRIVATE_FLAG); \ -- }) --# else --# define __lll_private_flag(fl, private) \ -+# define __lll_private_flag(fl, private) \ - (((fl) | FUTEX_PRIVATE_FLAG) ^ (private)) --# endif - - # define lll_futex_syscall(nargs, futexp, op, ...) \ - ({ \ --- -2.27.0 - diff --git a/meta/recipes-core/glibc/glibc/0002-CVE-2021-38604.patch b/meta/recipes-core/glibc/glibc/0002-CVE-2021-38604.patch deleted file mode 100644 index b654cdfecb..0000000000 --- a/meta/recipes-core/glibc/glibc/0002-CVE-2021-38604.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 4cc79c217744743077bf7a0ec5e0a4318f1e6641 Mon Sep 17 00:00:00 2001 -From: Nikita Popov <npv1310@gmail.com> -Date: Thu, 12 Aug 2021 16:09:50 +0530 -Subject: [PATCH] librt: add test (bug 28213) - -This test implements following logic: -1) Create POSIX message queue. - Register a notification with mq_notify (using NULL attributes). - Then immediately unregister the notification with mq_notify. - Helper thread in a vulnerable version of glibc - should cause NULL pointer dereference after these steps. -2) Once again, register the same notification. - Try to send a dummy message. - Test is considered successfulif the dummy message - is successfully received by the callback function. - -Upstream-Status: Backport [https://sourceware.org/git/?p=glibc.git;a=commit;h=4cc79c217744743077bf7a0ec5e0a4318f1e6641] -CVE: CVE-2021-38604 - -Signed-off-by: Nikita Popov <npv1310@gmail.com> -Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org> -Signed-off-by: Vinay Kumar <vinay.m.engg@gmail.com> ---- - rt/Makefile | 1 + - rt/tst-bz28213.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 102 insertions(+) - create mode 100644 rt/tst-bz28213.c - -diff --git a/rt/Makefile b/rt/Makefile -index 7b374f2073..c87d95793a 100644 ---- a/rt/Makefile -+++ b/rt/Makefile -@@ -44,6 +44,7 @@ tests := tst-shm tst-timer tst-timer2 \ - tst-aio7 tst-aio8 tst-aio9 tst-aio10 \ - tst-mqueue1 tst-mqueue2 tst-mqueue3 tst-mqueue4 \ - tst-mqueue5 tst-mqueue6 tst-mqueue7 tst-mqueue8 tst-mqueue9 \ -+ tst-bz28213 \ - tst-timer3 tst-timer4 tst-timer5 \ - tst-cpuclock2 tst-cputimer1 tst-cputimer2 tst-cputimer3 \ - tst-shm-cancel -diff --git a/rt/tst-bz28213.c b/rt/tst-bz28213.c -new file mode 100644 -index 0000000000..0c096b5a0a ---- /dev/null -+++ b/rt/tst-bz28213.c -@@ -0,0 +1,101 @@ -+/* Bug 28213: test for NULL pointer dereference in mq_notify. -+ Copyright (C) The GNU Toolchain Authors. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <errno.h> -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <fcntl.h> -+#include <unistd.h> -+#include <mqueue.h> -+#include <signal.h> -+#include <stdlib.h> -+#include <string.h> -+#include <support/check.h> -+ -+static mqd_t m = -1; -+static const char msg[] = "hello"; -+ -+static void -+check_bz28213_cb (union sigval sv) -+{ -+ char buf[sizeof (msg)]; -+ -+ (void) sv; -+ -+ TEST_VERIFY_EXIT ((size_t) mq_receive (m, buf, sizeof (buf), NULL) -+ == sizeof (buf)); -+ TEST_VERIFY_EXIT (memcmp (buf, msg, sizeof (buf)) == 0); -+ -+ exit (0); -+} -+ -+static void -+check_bz28213 (void) -+{ -+ struct sigevent sev; -+ -+ memset (&sev, '\0', sizeof (sev)); -+ sev.sigev_notify = SIGEV_THREAD; -+ sev.sigev_notify_function = check_bz28213_cb; -+ -+ /* Step 1: Register & unregister notifier. -+ Helper thread should receive NOTIFY_REMOVED notification. -+ In a vulnerable version of glibc, NULL pointer dereference follows. */ -+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0); -+ TEST_VERIFY_EXIT (mq_notify (m, NULL) == 0); -+ -+ /* Step 2: Once again, register notification. -+ Try to send one message. -+ Test is considered successful, if the callback does exit (0). */ -+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0); -+ TEST_VERIFY_EXIT (mq_send (m, msg, sizeof (msg), 1) == 0); -+ -+ /* Wait... */ -+ pause (); -+} -+ -+static int -+do_test (void) -+{ -+ static const char m_name[] = "/bz28213_queue"; -+ struct mq_attr m_attr; -+ -+ memset (&m_attr, '\0', sizeof (m_attr)); -+ m_attr.mq_maxmsg = 1; -+ m_attr.mq_msgsize = sizeof (msg); -+ -+ m = mq_open (m_name, -+ O_RDWR | O_CREAT | O_EXCL, -+ 0600, -+ &m_attr); -+ -+ if (m < 0) -+ { -+ if (errno == ENOSYS) -+ FAIL_UNSUPPORTED ("POSIX message queues are not implemented\n"); -+ FAIL_EXIT1 ("Failed to create POSIX message queue: %m\n"); -+ } -+ -+ TEST_VERIFY_EXIT (mq_unlink (m_name) == 0); -+ -+ check_bz28213 (); -+ -+ return 0; -+} -+ -+#include <support/test-driver.c> diff --git a/meta/recipes-core/glibc/glibc/0031-x86-Require-full-ISA-support-for-x86-64-level-marker.patch b/meta/recipes-core/glibc/glibc/0031-x86-Require-full-ISA-support-for-x86-64-level-marker.patch deleted file mode 100644 index 3cb60b2e55..0000000000 --- a/meta/recipes-core/glibc/glibc/0031-x86-Require-full-ISA-support-for-x86-64-level-marker.patch +++ /dev/null @@ -1,116 +0,0 @@ -From b1971f6f1331d738d1d6b376b4741668a7546125 Mon Sep 17 00:00:00 2001 -From: "H.J. Lu" <hjl.tools@gmail.com> -Date: Tue, 2 Feb 2021 13:45:58 -0800 -Subject: [PATCH] x86: Require full ISA support for x86-64 level marker [BZ #27318] - -Since -march=sandybridge enables ISAs in x86-64 ISA level v3, the v3 -marker is set on libc.so. We couldn't set the needed ISA marker to v2 -since this libc won't run on all v2 machines. Technically, the v3 marker -is correct. But the resulting libc.so won't run on Sandy Brigde, which -is a v2 machine, even when libc is compiled with -march=sandybridge: - -$ ./elf/ld.so ./libc.so -./libc.so: (p) CPU ISA level is lower than required: needed: 7; got: 3 - -Instead, we require full ISA support for x86-64 level marker and disable -x86-64 level marker for -march=sandybridge which enables ISAs between v2 -and v3. - -Upstream-Status: Submitted [https://sourceware.org/pipermail/libc-alpha/2021-February/122297.html] -Signed-off-by: Khem Raj <raj.khem@gmail.com> ---- - - sysdeps/x86/configure | 7 ++++++- - sysdeps/x86/configure.ac | 2 +- - sysdeps/x86/isa-level.c | 21 ++++++++++++++++++++- - 3 files changed, 27 insertions(+), 3 deletions(-) - -diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure -index 5e32dc62b3..5b20646843 100644 ---- a/sysdeps/x86/configure -+++ b/sysdeps/x86/configure -@@ -133,7 +133,12 @@ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -nostartfiles -nostdlib -r -o conftest c - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - count=`LC_ALL=C $READELF -n conftest | grep NT_GNU_PROPERTY_TYPE_0 | wc -l` -- if test "$count" = 1; then -+ if test "$count" = 1 && { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -DINCLUDE_X86_ISA_LEVEL -S -o conftest.s $srcdir/sysdeps/x86/isa-level.c' -+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 -+ (eval $ac_try) 2>&5 -+ ac_status=$? -+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 -+ test $ac_status = 0; }; }; then - libc_cv_include_x86_isa_level=yes - fi - fi -diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac -index f94088f377..54ecd33d2c 100644 ---- a/sysdeps/x86/configure.ac -+++ b/sysdeps/x86/configure.ac -@@ -100,7 +100,7 @@ EOF - libc_cv_include_x86_isa_level=no - if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS -nostartfiles -nostdlib -r -o conftest conftest1.S conftest2.S); then - count=`LC_ALL=C $READELF -n conftest | grep NT_GNU_PROPERTY_TYPE_0 | wc -l` -- if test "$count" = 1; then -+ if test "$count" = 1 && AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS -DINCLUDE_X86_ISA_LEVEL -S -o conftest.s $srcdir/sysdeps/x86/isa-level.c); then - libc_cv_include_x86_isa_level=yes - fi - fi -diff --git a/sysdeps/x86/isa-level.c b/sysdeps/x86/isa-level.c -index aaf524cb56..7f83449061 100644 ---- a/sysdeps/x86/isa-level.c -+++ b/sysdeps/x86/isa-level.c -@@ -25,12 +25,17 @@ - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - --#include <elf.h> -+#ifdef _LIBC -+# include <elf.h> -+#endif - - /* ELF program property for x86 ISA level. */ - #ifdef INCLUDE_X86_ISA_LEVEL - # if defined __x86_64__ || defined __FXSR__ || !defined _SOFT_FLOAT \ - || defined __MMX__ || defined __SSE__ || defined __SSE2__ -+# if !defined __SSE__ || !defined __SSE2__ -+# error "Missing ISAs for x86-64 ISA level baseline" -+# endif - # define ISA_BASELINE GNU_PROPERTY_X86_ISA_1_BASELINE - # else - # define ISA_BASELINE 0 -@@ -40,6 +45,11 @@ - || (defined __x86_64__ && defined __LAHF_SAHF__) \ - || defined __POPCNT__ || defined __SSE3__ \ - || defined __SSSE3__ || defined __SSE4_1__ || defined __SSE4_2__ -+# if !defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ -+ || !defined __POPCNT__ || !defined __SSE3__ \ -+ || !defined __SSSE3__ || !defined __SSE4_1__ || !defined __SSE4_2__ -+# error "Missing ISAs for x86-64 ISA level v2" -+# endif - # define ISA_V2 GNU_PROPERTY_X86_ISA_1_V2 - # else - # define ISA_V2 0 -@@ -48,6 +58,10 @@ - # if defined __AVX__ || defined __AVX2__ || defined __F16C__ \ - || defined __FMA__ || defined __LZCNT__ || defined __MOVBE__ \ - || defined __XSAVE__ -+# if !defined __AVX__ || !defined __AVX2__ || !defined __F16C__ \ -+ || !defined __FMA__ || !defined __LZCNT__ -+# error "Missing ISAs for x86-64 ISA level v3" -+# endif - # define ISA_V3 GNU_PROPERTY_X86_ISA_1_V3 - # else - # define ISA_V3 0 -@@ -55,6 +69,11 @@ - - # if defined __AVX512F__ || defined __AVX512BW__ || defined __AVX512CD__ \ - || defined __AVX512DQ__ || defined __AVX512VL__ -+# if !defined __AVX512F__ || !defined __AVX512BW__ \ -+ || !defined __AVX512CD__ || !defined __AVX512DQ__ \ -+ || !defined __AVX512VL__ -+# error "Missing ISAs for x86-64 ISA level v4" -+# endif - # define ISA_V4 GNU_PROPERTY_X86_ISA_1_V4 - # else - # define ISA_V4 0 diff --git a/meta/recipes-core/glibc/glibc/0032-string-Work-around-GCC-PR-98512-in-rawmemchr.patch b/meta/recipes-core/glibc/glibc/0032-string-Work-around-GCC-PR-98512-in-rawmemchr.patch deleted file mode 100644 index e904b28a05..0000000000 --- a/meta/recipes-core/glibc/glibc/0032-string-Work-around-GCC-PR-98512-in-rawmemchr.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 044e603b698093cf48f6e6229e0b66acf05227e4 Mon Sep 17 00:00:00 2001 -From: Florian Weimer <fweimer@redhat.com> -Date: Fri, 19 Feb 2021 13:29:00 +0100 -Subject: [PATCH] string: Work around GCC PR 98512 in rawmemchr - -Upstream-Status: Backport [https://sourceware.org/git/?p=glibc.git;a=commit;h=044e603b698093cf48f6e6229e0b66acf05227e4] -Signed-off-by: Khem Raj <raj.khem@gmail.com> ---- - string/rawmemchr.c | 26 +++++++++++++++----------- - 1 file changed, 15 insertions(+), 11 deletions(-) - -diff --git a/string/rawmemchr.c b/string/rawmemchr.c -index 59bbeeaa42..b8523118e5 100644 ---- a/string/rawmemchr.c -+++ b/string/rawmemchr.c -@@ -22,24 +22,28 @@ - # define RAWMEMCHR __rawmemchr - #endif - --/* Find the first occurrence of C in S. */ --void * --RAWMEMCHR (const void *s, int c) --{ -- DIAG_PUSH_NEEDS_COMMENT; -+/* The pragmata should be nested inside RAWMEMCHR below, but that -+ triggers GCC PR 98512. */ -+DIAG_PUSH_NEEDS_COMMENT; - #if __GNUC_PREREQ (7, 0) -- /* GCC 8 warns about the size passed to memchr being larger than -- PTRDIFF_MAX; the use of SIZE_MAX is deliberate here. */ -- DIAG_IGNORE_NEEDS_COMMENT (8, "-Wstringop-overflow="); -+/* GCC 8 warns about the size passed to memchr being larger than -+ PTRDIFF_MAX; the use of SIZE_MAX is deliberate here. */ -+DIAG_IGNORE_NEEDS_COMMENT (8, "-Wstringop-overflow="); - #endif - #if __GNUC_PREREQ (11, 0) -- /* Likewise GCC 11, with a different warning option. */ -- DIAG_IGNORE_NEEDS_COMMENT (11, "-Wstringop-overread"); -+/* Likewise GCC 11, with a different warning option. */ -+DIAG_IGNORE_NEEDS_COMMENT (11, "-Wstringop-overread"); - #endif -+ -+/* Find the first occurrence of C in S. */ -+void * -+RAWMEMCHR (const void *s, int c) -+{ - if (c != '\0') - return memchr (s, c, (size_t)-1); -- DIAG_POP_NEEDS_COMMENT; - return (char *)s + strlen (s); - } - libc_hidden_def (__rawmemchr) - weak_alias (__rawmemchr, rawmemchr) -+ -+DIAG_POP_NEEDS_COMMENT; --- -2.30.1 - diff --git a/meta/recipes-core/glibc/glibc/0033-x86-Handle-_SC_LEVEL1_ICACHE_LINESIZE-BZ-27444.patch b/meta/recipes-core/glibc/glibc/0033-x86-Handle-_SC_LEVEL1_ICACHE_LINESIZE-BZ-27444.patch deleted file mode 100644 index 3a004e227f..0000000000 --- a/meta/recipes-core/glibc/glibc/0033-x86-Handle-_SC_LEVEL1_ICACHE_LINESIZE-BZ-27444.patch +++ /dev/null @@ -1,185 +0,0 @@ -From 750b00a1ddae220403fd892a6fd4e0791ffd154a Mon Sep 17 00:00:00 2001 -From: "H.J. Lu" <hjl.tools@gmail.com> -Date: Fri, 18 Sep 2020 07:55:14 -0700 -Subject: [PATCH] x86: Handle _SC_LEVEL1_ICACHE_LINESIZE [BZ #27444] - - x86: Move x86 processor cache info to cpu_features - -missed _SC_LEVEL1_ICACHE_LINESIZE. - -1. Add level1_icache_linesize to struct cpu_features. -2. Initialize level1_icache_linesize by calling handle_intel, -handle_zhaoxin and handle_amd with _SC_LEVEL1_ICACHE_LINESIZE. -3. Return level1_icache_linesize for _SC_LEVEL1_ICACHE_LINESIZE. - -Upstream-Status: Backport [https://sourceware.org/bugzilla/show_bug.cgi?id=27444] -Signed-off-by: Andrei Gherzan <andrei.gherzan@huawei.com> ---- - sysdeps/x86/Makefile | 8 +++ - sysdeps/x86/cacheinfo.c | 3 + - sysdeps/x86/dl-cacheinfo.h | 6 ++ - sysdeps/x86/include/cpu-features.h | 2 + - .../x86/tst-sysconf-cache-linesize-static.c | 1 + - sysdeps/x86/tst-sysconf-cache-linesize.c | 57 +++++++++++++++++++ - 6 files changed, 77 insertions(+) - create mode 100644 sysdeps/x86/tst-sysconf-cache-linesize-static.c - create mode 100644 sysdeps/x86/tst-sysconf-cache-linesize.c - -diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile -index dd82674342..d231263051 100644 ---- a/sysdeps/x86/Makefile -+++ b/sysdeps/x86/Makefile -@@ -208,3 +208,11 @@ $(objpfx)check-cet.out: $(..)sysdeps/x86/check-cet.awk \ - generated += check-cet.out - endif - endif -+ -+ifeq ($(subdir),posix) -+tests += \ -+ tst-sysconf-cache-linesize \ -+ tst-sysconf-cache-linesize-static -+tests-static += \ -+ tst-sysconf-cache-linesize-static -+endif -diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c -index 7b8df45e3b..5ea4723ca6 100644 ---- a/sysdeps/x86/cacheinfo.c -+++ b/sysdeps/x86/cacheinfo.c -@@ -32,6 +32,9 @@ __cache_sysconf (int name) - case _SC_LEVEL1_ICACHE_SIZE: - return cpu_features->level1_icache_size; - -+ case _SC_LEVEL1_ICACHE_LINESIZE: -+ return cpu_features->level1_icache_linesize; -+ - case _SC_LEVEL1_DCACHE_SIZE: - return cpu_features->level1_dcache_size; - -diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h -index a31fa0783a..7cd00b92f1 100644 ---- a/sysdeps/x86/dl-cacheinfo.h -+++ b/sysdeps/x86/dl-cacheinfo.h -@@ -707,6 +707,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) - long int core; - unsigned int threads = 0; - unsigned long int level1_icache_size = -1; -+ unsigned long int level1_icache_linesize = -1; - unsigned long int level1_dcache_size = -1; - unsigned long int level1_dcache_assoc = -1; - unsigned long int level1_dcache_linesize = -1; -@@ -726,6 +727,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) - - level1_icache_size - = handle_intel (_SC_LEVEL1_ICACHE_SIZE, cpu_features); -+ level1_icache_linesize -+ = handle_intel (_SC_LEVEL1_ICACHE_LINESIZE, cpu_features); - level1_dcache_size = data; - level1_dcache_assoc - = handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features); -@@ -753,6 +756,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) - shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE); - - level1_icache_size = handle_zhaoxin (_SC_LEVEL1_ICACHE_SIZE); -+ level1_icache_linesize = handle_zhaoxin (_SC_LEVEL1_ICACHE_LINESIZE); - level1_dcache_size = data; - level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC); - level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE); -@@ -772,6 +776,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) - shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); - - level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE); -+ level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE); - level1_dcache_size = data; - level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC); - level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE); -@@ -833,6 +838,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) - } - - cpu_features->level1_icache_size = level1_icache_size; -+ cpu_features->level1_icache_linesize = level1_icache_linesize; - cpu_features->level1_dcache_size = level1_dcache_size; - cpu_features->level1_dcache_assoc = level1_dcache_assoc; - cpu_features->level1_dcache_linesize = level1_dcache_linesize; -diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h -index 624736b40e..39a3f4f311 100644 ---- a/sysdeps/x86/include/cpu-features.h -+++ b/sysdeps/x86/include/cpu-features.h -@@ -874,6 +874,8 @@ struct cpu_features - unsigned long int rep_stosb_threshold; - /* _SC_LEVEL1_ICACHE_SIZE. */ - unsigned long int level1_icache_size; -+ /* _SC_LEVEL1_ICACHE_LINESIZE. */ -+ unsigned long int level1_icache_linesize; - /* _SC_LEVEL1_DCACHE_SIZE. */ - unsigned long int level1_dcache_size; - /* _SC_LEVEL1_DCACHE_ASSOC. */ -diff --git a/sysdeps/x86/tst-sysconf-cache-linesize-static.c b/sysdeps/x86/tst-sysconf-cache-linesize-static.c -new file mode 100644 -index 0000000000..152ae68821 ---- /dev/null -+++ b/sysdeps/x86/tst-sysconf-cache-linesize-static.c -@@ -0,0 +1 @@ -+#include "tst-sysconf-cache-linesize.c" -diff --git a/sysdeps/x86/tst-sysconf-cache-linesize.c b/sysdeps/x86/tst-sysconf-cache-linesize.c -new file mode 100644 -index 0000000000..642dbde5d2 ---- /dev/null -+++ b/sysdeps/x86/tst-sysconf-cache-linesize.c -@@ -0,0 +1,57 @@ -+/* Test system cache line sizes. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, see -+ <https://www.gnu.org/licenses/>. */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <array_length.h> -+ -+static struct -+{ -+ const char *name; -+ int _SC_val; -+} sc_options[] = -+ { -+#define N(name) { "_SC_"#name, _SC_##name } -+ N (LEVEL1_ICACHE_LINESIZE), -+ N (LEVEL1_DCACHE_LINESIZE), -+ N (LEVEL2_CACHE_LINESIZE) -+ }; -+ -+static int -+do_test (void) -+{ -+ int result = EXIT_SUCCESS; -+ -+ for (int i = 0; i < array_length (sc_options); ++i) -+ { -+ long int scret = sysconf (sc_options[i]._SC_val); -+ if (scret < 0) -+ { -+ printf ("sysconf (%s) returned < 0 (%ld)\n", -+ sc_options[i].name, scret); -+ result = EXIT_FAILURE; -+ } -+ else -+ printf ("sysconf (%s): %ld\n", sc_options[i].name, scret); -+ } -+ -+ return result; -+} -+ -+#include <support/test-driver.c> diff --git a/meta/recipes-core/glibc/glibc/CVE-2021-27318-revert.patch b/meta/recipes-core/glibc/glibc/CVE-2021-27318-revert.patch new file mode 100644 index 0000000000..2f08a90dd0 --- /dev/null +++ b/meta/recipes-core/glibc/glibc/CVE-2021-27318-revert.patch @@ -0,0 +1,174 @@ +Since the full ISA set used in an ELF binary is unknown to compiler, +an x86-64 ISA level marker indicates the minimum, not maximum, ISA set +required to run such an ELF binary. We never guarantee a library with +an x86-64 ISA level v3 marker doesn't contain other ISAs beyond x86-64 +ISA level v3, like AVX VNNI. We check the x86-64 ISA level marker for +the minimum ISA set. Since -march=sandybridge enables only some ISAs +in x86-64 ISA level v3, we should set the needed ISA marker to v2. +Otherwise, libc is compiled with -march=sandybridge will fail to run on +Sandy Bridge: + +$ ./elf/ld.so ./libc.so +./libc.so: (p) CPU ISA level is lower than required: needed: 7; got: 3 + +Set the minimum, instead of maximum, x86-64 ISA level marker should have +no impact on the b-hwcaps directory assignment logic in ldconfig nor +ld.so. + +(cherry picked from commit 339bf918ea4830fb35614632e96f3aab3237adce) +--- + config.h.in | 6 ++++++ + sysdeps/x86/configure | 28 ++++++++++++++++++++++++++++ + sysdeps/x86/configure.ac | 16 ++++++++++++++++ + sysdeps/x86/isa-level.c | 25 ++++++++++++++----------- + 4 files changed, 64 insertions(+), 11 deletions(-) + +diff --git a/config.h.in b/config.h.in +--- a/config.h.in 2021-10-16 03:28:49.447573081 -0700 ++++ b/config.h.in 2021-10-16 03:29:38.626741181 -0700 +@@ -275,4 +275,10 @@ + /* Define if x86 ISA level should be included in shared libraries. */ + #undef INCLUDE_X86_ISA_LEVEL + ++/* Define if -msahf is enabled by default on x86. */ ++#undef HAVE_X86_LAHF_SAHF ++ ++/* Define if -mmovbe is enabled by default on x86. */ ++#undef HAVE_X86_MOVBE ++ + #endif +diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure +--- a/sysdeps/x86/configure 2021-10-16 03:28:49.587570713 -0700 ++++ b/sysdeps/x86/configure 2021-10-16 03:29:39.330729277 -0700 +@@ -126,6 +126,8 @@ cat > conftest2.S <<EOF + 4: + EOF + libc_cv_include_x86_isa_level=no ++libc_cv_have_x86_lahf_sahf=no ++libc_cv_have_x86_movbe=no + if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -nostartfiles -nostdlib -r -o conftest conftest1.S conftest2.S' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 +@@ -135,6 +137,24 @@ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS + count=`LC_ALL=C $READELF -n conftest | grep NT_GNU_PROPERTY_TYPE_0 | wc -l` + if test "$count" = 1; then + libc_cv_include_x86_isa_level=yes ++ cat > conftest.c <<EOF ++EOF ++ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fverbose-asm -S -o - conftest.c' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } | grep -q "\-msahf"; then ++ libc_cv_have_x86_lahf_sahf=yes ++ fi ++ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fverbose-asm -S -o - conftest.c' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } | grep -q "\-mmovbe"; then ++ libc_cv_have_x86_movbe=yes ++ fi + fi + fi + rm -f conftest* +@@ -145,5 +165,13 @@ if test $libc_cv_include_x86_isa_level = + $as_echo "#define INCLUDE_X86_ISA_LEVEL 1" >>confdefs.h + + fi ++if test $libc_cv_have_x86_lahf_sahf = yes; then ++ $as_echo "#define HAVE_X86_LAHF_SAHF 1" >>confdefs.h ++ ++fi ++if test $libc_cv_have_x86_movbe = yes; then ++ $as_echo "#define HAVE_X86_MOVBE 1" >>confdefs.h ++ ++fi + config_vars="$config_vars + enable-x86-isa-level = $libc_cv_include_x86_isa_level" +diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac +--- a/sysdeps/x86/configure.ac 2021-10-16 03:28:49.587570713 -0700 ++++ b/sysdeps/x86/configure.ac 2021-10-16 03:29:40.038717306 -0700 +@@ -98,14 +98,30 @@ cat > conftest2.S <<EOF + 4: + EOF + libc_cv_include_x86_isa_level=no ++libc_cv_have_x86_lahf_sahf=no ++libc_cv_have_x86_movbe=no + if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS -nostartfiles -nostdlib -r -o conftest conftest1.S conftest2.S); then + count=`LC_ALL=C $READELF -n conftest | grep NT_GNU_PROPERTY_TYPE_0 | wc -l` + if test "$count" = 1; then + libc_cv_include_x86_isa_level=yes ++ cat > conftest.c <<EOF ++EOF ++ if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS -fverbose-asm -S -o - conftest.c) | grep -q "\-msahf"; then ++ libc_cv_have_x86_lahf_sahf=yes ++ fi ++ if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS -fverbose-asm -S -o - conftest.c) | grep -q "\-mmovbe"; then ++ libc_cv_have_x86_movbe=yes ++ fi + fi + fi + rm -f conftest*]) + if test $libc_cv_include_x86_isa_level = yes; then + AC_DEFINE(INCLUDE_X86_ISA_LEVEL) + fi ++if test $libc_cv_have_x86_lahf_sahf = yes; then ++ AC_DEFINE(HAVE_X86_LAHF_SAHF) ++fi ++if test $libc_cv_have_x86_movbe = yes; then ++ AC_DEFINE(HAVE_X86_MOVBE) ++fi + LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level]) +diff --git a/sysdeps/x86/isa-level.c b/sysdeps/x86/isa-level.c +--- a/sysdeps/x86/isa-level.c 2021-10-16 03:28:49.587570713 -0700 ++++ b/sysdeps/x86/isa-level.c 2021-10-16 03:29:40.766704997 -0700 +@@ -29,32 +29,35 @@ + + /* ELF program property for x86 ISA level. */ + #ifdef INCLUDE_X86_ISA_LEVEL +-# if defined __x86_64__ || defined __FXSR__ || !defined _SOFT_FLOAT \ +- || defined __MMX__ || defined __SSE__ || defined __SSE2__ ++# if defined __SSE__ && defined __SSE2__ ++/* NB: ISAs, excluding MMX, in x86-64 ISA level baseline are used. */ + # define ISA_BASELINE GNU_PROPERTY_X86_ISA_1_BASELINE + # else + # define ISA_BASELINE 0 + # endif + +-# if defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ +- || (defined __x86_64__ && defined __LAHF_SAHF__) \ +- || defined __POPCNT__ || defined __SSE3__ \ +- || defined __SSSE3__ || defined __SSE4_1__ || defined __SSE4_2__ ++# if ISA_BASELINE && defined __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 \ ++ && defined HAVE_X86_LAHF_SAHF && defined __POPCNT__ \ ++ && defined __SSE3__ && defined __SSSE3__ && defined __SSE4_1__ \ ++ && defined __SSE4_2__ ++/* NB: ISAs in x86-64 ISA level v2 are used. */ + # define ISA_V2 GNU_PROPERTY_X86_ISA_1_V2 + # else + # define ISA_V2 0 + # endif + +-# if defined __AVX__ || defined __AVX2__ || defined __F16C__ \ +- || defined __FMA__ || defined __LZCNT__ || defined __MOVBE__ \ +- || defined __XSAVE__ ++# if ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \ ++ && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE ++/* NB: ISAs in x86-64 ISA level v3 are used. */ + # define ISA_V3 GNU_PROPERTY_X86_ISA_1_V3 + # else + # define ISA_V3 0 + # endif + +-# if defined __AVX512F__ || defined __AVX512BW__ || defined __AVX512CD__ \ +- || defined __AVX512DQ__ || defined __AVX512VL__ ++# if ISA_V3 && defined __AVX512F__ && defined __AVX512BW__ \ ++ && defined __AVX512CD__ && defined __AVX512DQ__ \ ++ && defined __AVX512VL__ ++/* NB: ISAs in x86-64 ISA level v4 are used. */ + # define ISA_V4 GNU_PROPERTY_X86_ISA_1_V4 + # else + # define ISA_V4 0 diff --git a/meta/recipes-core/glibc/glibc/CVE-2021-27645.patch b/meta/recipes-core/glibc/glibc/CVE-2021-27645.patch deleted file mode 100644 index 26c5c0d2a9..0000000000 --- a/meta/recipes-core/glibc/glibc/CVE-2021-27645.patch +++ /dev/null @@ -1,51 +0,0 @@ -From dca565886b5e8bd7966e15f0ca42ee5cff686673 Mon Sep 17 00:00:00 2001 -From: DJ Delorie <dj@redhat.com> -Date: Thu, 25 Feb 2021 16:08:21 -0500 -Subject: [PATCH] nscd: Fix double free in netgroupcache [BZ #27462] - -In commit 745664bd798ec8fd50438605948eea594179fba1 a use-after-free -was fixed, but this led to an occasional double-free. This patch -tracks the "live" allocation better. - -Tested manually by a third party. - -Related: RHBZ 1927877 - -Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org> -Reviewed-by: Carlos O'Donell <carlos@redhat.com> - -Upstream-Status: Backport [https://sourceware.org/git/?p=glibc.git;a=commit;h=dca565886b5e8bd7966e15f0ca42ee5cff686673] - -CVE: CVE-2021-27645 - -Reviewed-by: Carlos O'Donell <carlos@redhat.com> -Signed-off-by: Khairul Rohaizzat Jamaluddin <khairul.rohaizzat.jamaluddin@intel.com> ---- - nscd/netgroupcache.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c -index dba6ceec1b..ad2daddafd 100644 ---- a/nscd/netgroupcache.c -+++ b/nscd/netgroupcache.c -@@ -248,7 +248,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, - : NULL); - ndomain = (ndomain ? newbuf + ndomaindiff - : NULL); -- buffer = newbuf; -+ *tofreep = buffer = newbuf; - } - - nhost = memcpy (buffer + bufused, -@@ -319,7 +319,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, - else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE) - { - buflen *= 2; -- buffer = xrealloc (buffer, buflen); -+ *tofreep = buffer = xrealloc (buffer, buflen); - } - else if (status == NSS_STATUS_RETURN - || status == NSS_STATUS_NOTFOUND --- -2.27.0 - diff --git a/meta/recipes-core/glibc/glibc/CVE-2021-33574_1.patch b/meta/recipes-core/glibc/glibc/CVE-2021-33574_1.patch deleted file mode 100644 index 21f07ac303..0000000000 --- a/meta/recipes-core/glibc/glibc/CVE-2021-33574_1.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 709674ec86c3c6da4f0995897f6b0205c16d049d Mon Sep 17 00:00:00 2001 -From: Andreas Schwab <schwab@linux-m68k.org> -Date: Thu, 27 May 2021 12:49:47 +0200 -Subject: [PATCH] Use __pthread_attr_copy in mq_notify (bug 27896) - -Make a deep copy of the pthread attribute object to remove a potential -use-after-free issue. - -Upstream-Status: Backport -[https://sourceware.org/git/?p=glibc.git;a=commit;h=42d359350510506b87101cf77202fefcbfc790cb] - -CVE: -CVE-2021-33574 - -Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org> -Signed-off-by: Khairul Rohaizzat Jamaluddin <khairul.rohaizzat.jamaluddin@intel.com> ---- - NEWS | 4 ++++ - sysdeps/unix/sysv/linux/mq_notify.c | 15 ++++++++++----- - 2 files changed, 14 insertions(+), 5 deletions(-) - -diff --git a/NEWS b/NEWS -index 71f5d20324..017d656433 100644 ---- a/NEWS -+++ b/NEWS -@@ -118,6 +118,10 @@ Security related changes: - CVE-2019-25013: A buffer overflow has been fixed in the iconv function when - invoked with EUC-KR input containing invalid multibyte input sequences. - -+ CVE-2021-33574: The mq_notify function has a potential use-after-free -+ issue when using a notification type of SIGEV_THREAD and a thread -+ attribute with a non-default affinity mask. -+ - The following bugs are resolved with this release: - - [10635] libc: realpath portability patches -diff --git a/sysdeps/unix/sysv/linux/mq_notify.c b/sysdeps/unix/sysv/linux/mq_notify.c -index cc575a0cdd..f7ddfe5a6c 100644 ---- a/sysdeps/unix/sysv/linux/mq_notify.c -+++ b/sysdeps/unix/sysv/linux/mq_notify.c -@@ -133,8 +133,11 @@ helper_thread (void *arg) - (void) __pthread_barrier_wait (¬ify_barrier); - } - else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED) -- /* The only state we keep is the copy of the thread attributes. */ -- free (data.attr); -+ { -+ /* The only state we keep is the copy of the thread attributes. */ -+ pthread_attr_destroy (data.attr); -+ free (data.attr); -+ } - } - return NULL; - } -@@ -255,8 +258,7 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification) - if (data.attr == NULL) - return -1; - -- memcpy (data.attr, notification->sigev_notify_attributes, -- sizeof (pthread_attr_t)); -+ __pthread_attr_copy (data.attr, notification->sigev_notify_attributes); - } - - /* Construct the new request. */ -@@ -270,7 +272,10 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification) - - /* If it failed, free the allocated memory. */ - if (__glibc_unlikely (retval != 0)) -- free (data.attr); -+ { -+ pthread_attr_destroy (data.attr); -+ free (data.attr); -+ } - - return retval; - } diff --git a/meta/recipes-core/glibc/glibc/CVE-2021-33574_2.patch b/meta/recipes-core/glibc/glibc/CVE-2021-33574_2.patch deleted file mode 100644 index befccd7ac7..0000000000 --- a/meta/recipes-core/glibc/glibc/CVE-2021-33574_2.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 217b6dc298156bdb0d6aea9ea93e7e394a5ff091 Mon Sep 17 00:00:00 2001 -From: Florian Weimer <fweimer@redhat.com> -Date: Tue, 1 Jun 2021 17:51:41 +0200 -Subject: [PATCH] Fix use of __pthread_attr_copy in mq_notify (bug 27896) - -__pthread_attr_copy can fail and does not initialize the attribute -structure in that case. - -If __pthread_attr_copy is never called and there is no allocated -attribute, pthread_attr_destroy should not be called, otherwise -there is a null pointer dereference in rt/tst-mqueue6. - -Fixes commit 42d359350510506b87101cf77202fefcbfc790cb -("Use __pthread_attr_copy in mq_notify (bug 27896)"). - -Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org> - -Upstream-Status: Backport -[https://sourceware.org/git/?p=glibc.git;a=commit;h=217b6dc298156bdb0d6aea9ea93e7e394a5ff091] - -CVE: -CVE-2021-33574 - -Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org> -Signed-off-by: Khairul Rohaizzat Jamaluddin <khairul.rohaizzat.jamaluddin@intel.com> ---- - sysdeps/unix/sysv/linux/mq_notify.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/sysdeps/unix/sysv/linux/mq_notify.c b/sysdeps/unix/sysv/linux/mq_notify.c -index f7ddfe5a6c..6f46d29d1d 100644 ---- a/sysdeps/unix/sysv/linux/mq_notify.c -+++ b/sysdeps/unix/sysv/linux/mq_notify.c -@@ -258,7 +258,14 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification) - if (data.attr == NULL) - return -1; - -- __pthread_attr_copy (data.attr, notification->sigev_notify_attributes); -+ int ret = __pthread_attr_copy (data.attr, -+ notification->sigev_notify_attributes); -+ if (ret != 0) -+ { -+ free (data.attr); -+ __set_errno (ret); -+ return -1; -+ } - } - - /* Construct the new request. */ -@@ -271,7 +278,7 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification) - int retval = INLINE_SYSCALL (mq_notify, 2, mqdes, &se); - - /* If it failed, free the allocated memory. */ -- if (__glibc_unlikely (retval != 0)) -+ if (retval != 0 && data.attr != NULL) - { - pthread_attr_destroy (data.attr); - free (data.attr); --- -2.27.0 - diff --git a/meta/recipes-core/glibc/glibc/CVE-2021-35942.patch b/meta/recipes-core/glibc/glibc/CVE-2021-35942.patch deleted file mode 100644 index 5cae1bc91c..0000000000 --- a/meta/recipes-core/glibc/glibc/CVE-2021-35942.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 5adda61f62b77384718b4c0d8336ade8f2b4b35c Mon Sep 17 00:00:00 2001 -From: Andreas Schwab <schwab@linux-m68k.org> -Date: Fri, 25 Jun 2021 15:02:47 +0200 -Subject: [PATCH] wordexp: handle overflow in positional parameter number (bug - 28011) - -Use strtoul instead of atoi so that overflow can be detected. - -Upstream-Status: Backport [https://sourceware.org/git/?p=glibc.git;a=commit;h=5adda61f62b77384718b4c0d8336ade8f2b4b35c] -CVE: CVE-2021-35942 -Signed-off-by: Vinay Kumar <vinay.m.engg@gmail.com> ---- - posix/wordexp-test.c | 1 + - posix/wordexp.c | 2 +- - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/posix/wordexp-test.c b/posix/wordexp-test.c -index f93a546d7e..9df02dbbb3 100644 ---- a/posix/wordexp-test.c -+++ b/posix/wordexp-test.c -@@ -183,6 +183,7 @@ struct test_case_struct - { 0, NULL, "$var", 0, 0, { NULL, }, IFS }, - { 0, NULL, "\"\\n\"", 0, 1, { "\\n", }, IFS }, - { 0, NULL, "", 0, 0, { NULL, }, IFS }, -+ { 0, NULL, "${1234567890123456789012}", 0, 0, { NULL, }, IFS }, - - /* Flags not already covered (testit() has special handling for these) */ - { 0, NULL, "one two", WRDE_DOOFFS, 2, { "one", "two", }, IFS }, -diff --git a/posix/wordexp.c b/posix/wordexp.c -index bcbe96e48d..1f3b09f721 100644 ---- a/posix/wordexp.c -+++ b/posix/wordexp.c -@@ -1399,7 +1399,7 @@ envsubst: - /* Is it a numeric parameter? */ - else if (isdigit (env[0])) - { -- int n = atoi (env); -+ unsigned long n = strtoul (env, NULL, 10); - - if (n >= __libc_argc) - /* Substitute NULL. */ --- -2.17.1 - diff --git a/meta/recipes-core/glibc/glibc_2.33.bb b/meta/recipes-core/glibc/glibc_2.33.bb index 57a60cb9d8..a1e9eb3a16 100644 --- a/meta/recipes-core/glibc/glibc_2.33.bb +++ b/meta/recipes-core/glibc/glibc_2.33.bb @@ -56,16 +56,6 @@ SRC_URI = "${GLIBC_GIT_URI};branch=${SRCBRANCH};name=glibc \ file://0028-readlib-Add-OECORE_KNOWN_INTERPRETER_NAMES-to-known-.patch \ file://0029-wordsize.h-Unify-the-header-between-arm-and-aarch64.patch \ file://0030-powerpc-Do-not-ask-compiler-for-finding-arch.patch \ - file://0031-x86-Require-full-ISA-support-for-x86-64-level-marker.patch \ - file://0032-string-Work-around-GCC-PR-98512-in-rawmemchr.patch \ - file://0033-x86-Handle-_SC_LEVEL1_ICACHE_LINESIZE-BZ-27444.patch \ - file://CVE-2021-27645.patch \ - file://0001-nptl-Remove-private-futex-optimization-BZ-27304.patch \ - file://CVE-2021-33574_1.patch \ - file://CVE-2021-33574_2.patch \ - file://CVE-2021-35942.patch \ - file://0001-CVE-2021-38604.patch \ - file://0002-CVE-2021-38604.patch \ " S = "${WORKDIR}/git" B = "${WORKDIR}/build-${TARGET_SYS}" @@ -98,7 +88,7 @@ EXTRA_OECONF = "--enable-kernel=${OLDEST_KERNEL} \ EXTRA_OECONF += "${@get_libc_fpu_setting(bb, d)}" -EXTRA_OECONF_append_x86 = " --enable-cet" +EXTRA_OECONF_append_x86 = " ${@bb.utils.contains_any('TUNE_FEATURES', 'i586 c3', '--disable-cet', '--enable-cet', d)}" EXTRA_OECONF_append_x86-64 = " --enable-cet" PACKAGECONFIG ??= "nscd" diff --git a/meta/recipes-core/ifupdown/ifupdown_0.8.36.bb b/meta/recipes-core/ifupdown/ifupdown_0.8.36.bb index 0daf50acab..afc3196620 100644 --- a/meta/recipes-core/ifupdown/ifupdown_0.8.36.bb +++ b/meta/recipes-core/ifupdown/ifupdown_0.8.36.bb @@ -7,7 +7,7 @@ the file /etc/network/interfaces." LICENSE = "GPLv2" LIC_FILES_CHKSUM = "file://COPYING;md5=94d55d512a9ba36caa9b7df079bae19f" -SRC_URI = "git://salsa.debian.org/debian/ifupdown.git;protocol=https \ +SRC_URI = "git://salsa.debian.org/debian/ifupdown.git;protocol=https;branch=master \ file://defn2-c-man-don-t-rely-on-dpkg-architecture-to-set-a.patch \ file://99_network \ file://0001-Define-FNM_EXTMATCH-for-musl.patch \ diff --git a/meta/recipes-core/images/build-appliance-image_15.0.0.bb b/meta/recipes-core/images/build-appliance-image_15.0.0.bb index a13422490f..5631cd8ae6 100644 --- a/meta/recipes-core/images/build-appliance-image_15.0.0.bb +++ b/meta/recipes-core/images/build-appliance-image_15.0.0.bb @@ -24,7 +24,7 @@ IMAGE_FSTYPES = "wic.vmdk wic.vhd wic.vhdx" inherit core-image setuptools3 -SRCREV ?= "45fb2254f6961377ae0ad0c5c00735459fdcb182" +SRCREV ?= "2954fa87a4d325f1a3c722d6fb8bf13b17f9e7a0" SRC_URI = "git://git.yoctoproject.org/poky;branch=hardknott \ file://Yocto_Build_Appliance.vmx \ file://Yocto_Build_Appliance.vmxf \ diff --git a/meta/recipes-core/initrdscripts/initramfs-framework/finish b/meta/recipes-core/initrdscripts/initramfs-framework/finish index 717383ebac..f08a920867 100755 --- a/meta/recipes-core/initrdscripts/initramfs-framework/finish +++ b/meta/recipes-core/initrdscripts/initramfs-framework/finish @@ -12,6 +12,18 @@ finish_run() { fatal "ERROR: There's no '/dev' on rootfs." fi + # Unmount anything that was automounted by busybox via mdev-mount.sh. + # We're about to switch_root, and leaving anything mounted will prevent + # the next rootfs from modifying the block device. Ignore ROOT_DISK, + # if it was set by setup-live, because it'll be mounted over loopback + # to ROOTFS_DIR. + local dev + for dev in /run/media/*; do + if mountpoint -q "${dev}" && [ "${dev##*/}" != "${ROOT_DISK}" ]; then + umount -f "${dev}" || debug "Failed to unmount ${dev}" + fi + done + info "Switching root to '$ROOTFS_DIR'..." debug "Moving /dev, /proc and /sys onto rootfs..." diff --git a/meta/recipes-core/initscripts/init-system-helpers_1.60.bb b/meta/recipes-core/initscripts/init-system-helpers_1.60.bb index 33977e66c1..98f45e1355 100644 --- a/meta/recipes-core/initscripts/init-system-helpers_1.60.bb +++ b/meta/recipes-core/initscripts/init-system-helpers_1.60.bb @@ -17,7 +17,7 @@ LICENSE = "BSD-3-Clause & GPLv2" LIC_FILES_CHKSUM = "file://debian/copyright;md5=ee2b1830fcfead84d07bc060ec43e072" SRCREV = "dbd9197569c0935029acd5c9b02b84c68fd937ee" -SRC_URI = "git://salsa.debian.org/debian/init-system-helpers.git;protocol=https" +SRC_URI = "git://salsa.debian.org/debian/init-system-helpers.git;protocol=https;branch=master" S = "${WORKDIR}/git" diff --git a/meta/recipes-core/libxcrypt/libxcrypt.inc b/meta/recipes-core/libxcrypt/libxcrypt.inc index b5ca863d54..8008ba2d38 100644 --- a/meta/recipes-core/libxcrypt/libxcrypt.inc +++ b/meta/recipes-core/libxcrypt/libxcrypt.inc @@ -9,7 +9,7 @@ LIC_FILES_CHKSUM = "file://LICENSING;md5=bd5d9777dfe7076c4f2928f12fed226a \ inherit autotools pkgconfig -SRC_URI = "git://github.com/besser82/libxcrypt.git;branch=${SRCBRANCH} \ +SRC_URI = "git://github.com/besser82/libxcrypt.git;branch=${SRCBRANCH};protocol=https \ file://0001-configure.ac-do-not-use-compute-symver-floor.patch \ " SRCREV = "94d84f92ca123d851586016c4678eb1f21c19029" diff --git a/meta/recipes-core/libxml/libxml2/CVE-2022-23308-fix-regression.patch b/meta/recipes-core/libxml/libxml2/CVE-2022-23308-fix-regression.patch new file mode 100644 index 0000000000..eefecb9adb --- /dev/null +++ b/meta/recipes-core/libxml/libxml2/CVE-2022-23308-fix-regression.patch @@ -0,0 +1,99 @@ +From 646fe48d1c8a74310c409ddf81fe7df6700052af Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer <wellnhofer@aevum.de> +Date: Tue, 22 Feb 2022 11:51:08 +0100 +Subject: [PATCH] Fix --without-valid build + +Regressed in commit 652dd12a. +--- + valid.c | 58 ++++++++++++++++++++++++++++----------------------------- + 1 file changed, 29 insertions(+), 29 deletions(-) +--- + +From https://github.com/GNOME/libxml2.git + commit 646fe48d1c8a74310c409ddf81fe7df6700052af + +CVE: CVE-2022-23308 +Upstream-status: Backport + +Signed-off-by: Joe Slater <joe.slater@windriver.com> + + +diff --git a/valid.c b/valid.c +index 8e596f1d..9684683a 100644 +--- a/valid.c ++++ b/valid.c +@@ -479,35 +479,6 @@ nodeVPop(xmlValidCtxtPtr ctxt) + return (ret); + } + +-/** +- * xmlValidNormalizeString: +- * @str: a string +- * +- * Normalize a string in-place. +- */ +-static void +-xmlValidNormalizeString(xmlChar *str) { +- xmlChar *dst; +- const xmlChar *src; +- +- if (str == NULL) +- return; +- src = str; +- dst = str; +- +- while (*src == 0x20) src++; +- while (*src != 0) { +- if (*src == 0x20) { +- while (*src == 0x20) src++; +- if (*src != 0) +- *dst++ = 0x20; +- } else { +- *dst++ = *src++; +- } +- } +- *dst = 0; +-} +- + #ifdef DEBUG_VALID_ALGO + static void + xmlValidPrintNode(xmlNodePtr cur) { +@@ -2636,6 +2607,35 @@ xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table) { + (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ + xmlFree((char *)(str)); + ++/** ++ * xmlValidNormalizeString: ++ * @str: a string ++ * ++ * Normalize a string in-place. ++ */ ++static void ++xmlValidNormalizeString(xmlChar *str) { ++ xmlChar *dst; ++ const xmlChar *src; ++ ++ if (str == NULL) ++ return; ++ src = str; ++ dst = str; ++ ++ while (*src == 0x20) src++; ++ while (*src != 0) { ++ if (*src == 0x20) { ++ while (*src == 0x20) src++; ++ if (*src != 0) ++ *dst++ = 0x20; ++ } else { ++ *dst++ = *src++; ++ } ++ } ++ *dst = 0; ++} ++ + static int + xmlIsStreaming(xmlValidCtxtPtr ctxt) { + xmlParserCtxtPtr pctxt; +-- +2.35.1 + diff --git a/meta/recipes-core/libxml/libxml2/CVE-2022-23308.patch b/meta/recipes-core/libxml/libxml2/CVE-2022-23308.patch new file mode 100644 index 0000000000..708a98b45a --- /dev/null +++ b/meta/recipes-core/libxml/libxml2/CVE-2022-23308.patch @@ -0,0 +1,209 @@ +From 652dd12a858989b14eed4e84e453059cd3ba340e Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer <wellnhofer@aevum.de> +Date: Tue, 8 Feb 2022 03:29:24 +0100 +Subject: [PATCH] [CVE-2022-23308] Use-after-free of ID and IDREF attributes + +If a document is parsed with XML_PARSE_DTDVALID and without +XML_PARSE_NOENT, the value of ID attributes has to be normalized after +potentially expanding entities in xmlRemoveID. Otherwise, later calls +to xmlGetID can return a pointer to previously freed memory. + +ID attributes which are empty or contain only whitespace after +entity expansion are affected in a similar way. This is fixed by +not storing such attributes in the ID table. + +The test to detect streaming mode when validating against a DTD was +broken. In connection with the defects above, this could result in a +use-after-free when using the xmlReader interface with validation. +Fix detection of streaming mode to avoid similar issues. (This changes +the expected result of a test case. But as far as I can tell, using the +XML reader with XIncludes referencing the root document never worked +properly, anyway.) + +All of these issues can result in denial of service. Using xmlReader +with validation could result in disclosure of memory via the error +channel, typically stderr. The security impact of xmlGetID returning +a pointer to freed memory depends on the application. The typical use +case of calling xmlGetID on an unmodified document is not affected. +--- + result/XInclude/ns1.xml.rdr | 2 +- + valid.c | 88 +++++++++++++++++++++++-------------- + 2 files changed, 56 insertions(+), 34 deletions(-) + --- + +From https://github.com/GNOME/libxml2.git + commit 652dd12a858989b14eed4e84e453059cd3ba340e + +Remove patch to ns1.xml.rdr which does not exist in version 2.9.10. + +CVE: CVE-2022-23308 +Upstream-status: Backport + +Signed-off-by: Joe Slater <joe.slater@windriver.com> + + +diff --git a/valid.c b/valid.c +index 5ee391c0..8e596f1d 100644 +--- a/valid.c ++++ b/valid.c +@@ -479,6 +479,35 @@ nodeVPop(xmlValidCtxtPtr ctxt) + return (ret); + } + ++/** ++ * xmlValidNormalizeString: ++ * @str: a string ++ * ++ * Normalize a string in-place. ++ */ ++static void ++xmlValidNormalizeString(xmlChar *str) { ++ xmlChar *dst; ++ const xmlChar *src; ++ ++ if (str == NULL) ++ return; ++ src = str; ++ dst = str; ++ ++ while (*src == 0x20) src++; ++ while (*src != 0) { ++ if (*src == 0x20) { ++ while (*src == 0x20) src++; ++ if (*src != 0) ++ *dst++ = 0x20; ++ } else { ++ *dst++ = *src++; ++ } ++ } ++ *dst = 0; ++} ++ + #ifdef DEBUG_VALID_ALGO + static void + xmlValidPrintNode(xmlNodePtr cur) { +@@ -2607,6 +2636,24 @@ xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table) { + (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \ + xmlFree((char *)(str)); + ++static int ++xmlIsStreaming(xmlValidCtxtPtr ctxt) { ++ xmlParserCtxtPtr pctxt; ++ ++ if (ctxt == NULL) ++ return(0); ++ /* ++ * These magic values are also abused to detect whether we're validating ++ * while parsing a document. In this case, userData points to the parser ++ * context. ++ */ ++ if ((ctxt->finishDtd != XML_CTXT_FINISH_DTD_0) && ++ (ctxt->finishDtd != XML_CTXT_FINISH_DTD_1)) ++ return(0); ++ pctxt = ctxt->userData; ++ return(pctxt->parseMode == XML_PARSE_READER); ++} ++ + /** + * xmlFreeID: + * @not: A id +@@ -2650,7 +2697,7 @@ xmlAddID(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value, + if (doc == NULL) { + return(NULL); + } +- if (value == NULL) { ++ if ((value == NULL) || (value[0] == 0)) { + return(NULL); + } + if (attr == NULL) { +@@ -2681,7 +2728,7 @@ xmlAddID(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value, + */ + ret->value = xmlStrdup(value); + ret->doc = doc; +- if ((ctxt != NULL) && (ctxt->vstateNr != 0)) { ++ if (xmlIsStreaming(ctxt)) { + /* + * Operating in streaming mode, attr is gonna disappear + */ +@@ -2820,6 +2867,7 @@ xmlRemoveID(xmlDocPtr doc, xmlAttrPtr attr) { + ID = xmlNodeListGetString(doc, attr->children, 1); + if (ID == NULL) + return(-1); ++ xmlValidNormalizeString(ID); + + id = xmlHashLookup(table, ID); + if (id == NULL || id->attr != attr) { +@@ -3009,7 +3057,7 @@ xmlAddRef(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value, + * fill the structure. + */ + ret->value = xmlStrdup(value); +- if ((ctxt != NULL) && (ctxt->vstateNr != 0)) { ++ if (xmlIsStreaming(ctxt)) { + /* + * Operating in streaming mode, attr is gonna disappear + */ +@@ -4028,8 +4076,7 @@ xmlValidateAttributeValue2(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlChar * + xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem, const xmlChar *name, const xmlChar *value) { +- xmlChar *ret, *dst; +- const xmlChar *src; ++ xmlChar *ret; + xmlAttributePtr attrDecl = NULL; + int extsubset = 0; + +@@ -4070,19 +4117,7 @@ xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + ret = xmlStrdup(value); + if (ret == NULL) + return(NULL); +- src = value; +- dst = ret; +- while (*src == 0x20) src++; +- while (*src != 0) { +- if (*src == 0x20) { +- while (*src == 0x20) src++; +- if (*src != 0) +- *dst++ = 0x20; +- } else { +- *dst++ = *src++; +- } +- } +- *dst = 0; ++ xmlValidNormalizeString(ret); + if ((doc->standalone) && (extsubset == 1) && (!xmlStrEqual(value, ret))) { + xmlErrValidNode(ctxt, elem, XML_DTD_NOT_STANDALONE, + "standalone: %s on %s value had to be normalized based on external subset declaration\n", +@@ -4114,8 +4149,7 @@ xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlChar * + xmlValidNormalizeAttributeValue(xmlDocPtr doc, xmlNodePtr elem, + const xmlChar *name, const xmlChar *value) { +- xmlChar *ret, *dst; +- const xmlChar *src; ++ xmlChar *ret; + xmlAttributePtr attrDecl = NULL; + + if (doc == NULL) return(NULL); +@@ -4145,19 +4179,7 @@ xmlValidNormalizeAttributeValue(xmlDocPtr doc, xmlNodePtr elem, + ret = xmlStrdup(value); + if (ret == NULL) + return(NULL); +- src = value; +- dst = ret; +- while (*src == 0x20) src++; +- while (*src != 0) { +- if (*src == 0x20) { +- while (*src == 0x20) src++; +- if (*src != 0) +- *dst++ = 0x20; +- } else { +- *dst++ = *src++; +- } +- } +- *dst = 0; ++ xmlValidNormalizeString(ret); + return(ret); + } + +-- +2.25.1 + diff --git a/meta/recipes-core/libxml/libxml2_2.9.10.bb b/meta/recipes-core/libxml/libxml2_2.9.10.bb index ce4f9a3340..778312f662 100644 --- a/meta/recipes-core/libxml/libxml2_2.9.10.bb +++ b/meta/recipes-core/libxml/libxml2_2.9.10.bb @@ -30,6 +30,8 @@ SRC_URI = "http://www.xmlsoft.org/sources/libxml2-${PV}.tar.gz;name=libtar \ file://CVE-2021-3518-0002.patch \ file://CVE-2021-3537.patch \ file://CVE-2021-3541.patch \ + file://CVE-2022-23308.patch \ + file://CVE-2022-23308-fix-regression.patch \ " SRC_URI[libtar.md5sum] = "10942a1dc23137a8aa07f0639cbfece5" @@ -47,7 +49,7 @@ PACKAGECONFIG[ipv6] = "--enable-ipv6,--disable-ipv6," inherit autotools pkgconfig binconfig-disabled ptest -inherit ${@bb.utils.contains('PACKAGECONFIG', 'python', 'python3native', '', d)} +inherit ${@bb.utils.contains('PACKAGECONFIG', 'python', 'python3targetconfig', '', d)} RDEPENDS_${PN}-ptest += "make ${@bb.utils.contains('PACKAGECONFIG', 'python', 'libgcc python3-core python3-logging python3-shell python3-stringold python3-threading python3-unittest ${PN}-python', '', d)}" diff --git a/meta/recipes-core/musl/libucontext_git.bb b/meta/recipes-core/musl/libucontext_git.bb index 11affebb49..87946b7ec3 100644 --- a/meta/recipes-core/musl/libucontext_git.bb +++ b/meta/recipes-core/musl/libucontext_git.bb @@ -10,7 +10,7 @@ DEPENDS = "" PV = "0.10+${SRCPV}" SRCREV = "19fa1bbfc26efb92147b5e85cc0ca02a0e837561" -SRC_URI = "git://github.com/kaniini/libucontext \ +SRC_URI = "git://github.com/kaniini/libucontext;branch=master;protocol=https \ " S = "${WORKDIR}/git" diff --git a/meta/recipes-core/musl/musl-obstack.bb b/meta/recipes-core/musl/musl-obstack.bb index 3003935fe5..74de48c2cd 100644 --- a/meta/recipes-core/musl/musl-obstack.bb +++ b/meta/recipes-core/musl/musl-obstack.bb @@ -10,7 +10,7 @@ SECTION = "libs" PV = "1.1" SRCREV = "d2ad66b0df44a4b784956f7f7f2717131ddc05f4" -SRC_URI = "git://github.com/pullmoll/musl-obstack" +SRC_URI = "git://github.com/pullmoll/musl-obstack;branch=master;protocol=https" UPSTREAM_CHECK_COMMITS = "1" diff --git a/meta/recipes-core/musl/musl-utils.bb b/meta/recipes-core/musl/musl-utils.bb index dd0ce33061..c30509469c 100644 --- a/meta/recipes-core/musl/musl-utils.bb +++ b/meta/recipes-core/musl/musl-utils.bb @@ -11,7 +11,7 @@ SECTION = "utils" PV = "20170421" SRCREV = "fb5630138ccabbbc14a19d372096a04e42573c7d" -SRC_URI = "git://github.com/boltlinux/musl-utils" +SRC_URI = "git://github.com/boltlinux/musl-utils;branch=master;protocol=https" UPSTREAM_CHECK_COMMITS = "1" diff --git a/meta/recipes-core/musl/musl_git.bb b/meta/recipes-core/musl/musl_git.bb index e6f9e2441e..a241a2fbbd 100644 --- a/meta/recipes-core/musl/musl_git.bb +++ b/meta/recipes-core/musl/musl_git.bb @@ -12,7 +12,7 @@ PV = "${BASEVER}+git${SRCPV}" # mirror is at git://github.com/kraj/musl.git -SRC_URI = "git://git.musl-libc.org/musl \ +SRC_URI = "git://git.musl-libc.org/musl;branch=master \ file://0001-Make-dynamic-linker-a-relative-symlink-to-libc.patch \ file://0002-ldso-Use-syslibdir-and-libdir-as-default-pathes-to-l.patch \ " diff --git a/meta/recipes-core/ncurses/files/CVE-2021-39537.patch b/meta/recipes-core/ncurses/files/CVE-2021-39537.patch new file mode 100644 index 0000000000..d63bf57e8d --- /dev/null +++ b/meta/recipes-core/ncurses/files/CVE-2021-39537.patch @@ -0,0 +1,65 @@ +From e83ecbd26252bac163fc4377ef30edbd4acb0bad Mon Sep 17 00:00:00 2001 +From: Sven Joachim <svenjoac@gmx.de> +Date: Mon, 1 Jun 2020 08:03:52 +0200 +Subject: [PATCH] Import upstream patch 20200531 + +20200531 + + correct configure version-check/warnng for g++ to allow for 10.x + + re-enable "bel" in konsole-base (report by Nia Huang) + + add linux-s entry (patch by Alexandre Montaron). + + drop long-obsolete convert_configure.pl + + add test/test_parm.c, for checking tparm changes. + + improve parameter-checking for tparm, adding function _nc_tiparm() to + handle the most-used case, which accepts only numeric parameters + (report/testcase by "puppet-meteor"). + + use a more conservative estimate of the buffer-size in lib_tparm.c's + save_text() and save_number(), in case the sprintf() function + passes-through unexpected characters from a format specifier + (report/testcase by "puppet-meteor"). + + add a check for end-of-string in cvtchar to handle a malformed + string in infotocap (report/testcase by "puppet-meteor"). + +CVE: CVE-2021-39537 + +Upstream-Status: Backport [https://github.com/mirror/ncurses/commit/790a85dbd4a81d5f5d8dd02a44d84f01512ef443] + +Signed-off-by: Mingli Yu <mingli.yu@windriver.com> +--- + ncurses/tinfo/captoinfo.c | 11 +- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/ncurses/tinfo/captoinfo.c b/ncurses/tinfo/captoinfo.c +index 8b3b83d1..9362105a 100644 +--- a/ncurses/tinfo/captoinfo.c ++++ b/ncurses/tinfo/captoinfo.c +@@ -98,7 +98,7 @@ + #include <ctype.h> + #include <tic.h> + +-MODULE_ID("$Id: captoinfo.c,v 1.98 2020/02/02 23:34:34 tom Exp $") ++MODULE_ID("$Id: captoinfo.c,v 1.99 2020/05/25 21:28:29 tom Exp $") + + #if 0 + #define DEBUG_THIS(p) DEBUG(9, p) +@@ -216,12 +216,15 @@ cvtchar(register const char *sp) + } + break; + case '^': ++ len = 2; + c = UChar(*++sp); +- if (c == '?') ++ if (c == '?') { + c = 127; +- else ++ } else if (c == '\0') { ++ len = 1; ++ } else { + c &= 0x1f; +- len = 2; ++ } + break; + default: + c = UChar(*sp); +-- +2.17.1 + diff --git a/meta/recipes-core/ncurses/ncurses.inc b/meta/recipes-core/ncurses/ncurses.inc index ef59bc3b0a..9c74d2ec36 100644 --- a/meta/recipes-core/ncurses/ncurses.inc +++ b/meta/recipes-core/ncurses/ncurses.inc @@ -13,7 +13,7 @@ BINCONFIG = "${bindir}/ncurses5-config ${bindir}/ncursesw5-config \ inherit autotools binconfig-disabled multilib_header pkgconfig # Upstream has useful patches at times at ftp://invisible-island.net/ncurses/ -SRC_URI = "git://salsa.debian.org/debian/ncurses.git;protocol=https" +SRC_URI = "git://salsa.debian.org/debian/ncurses.git;protocol=https;branch=master" EXTRA_AUTORECONF = "-I m4" diff --git a/meta/recipes-core/ncurses/ncurses_6.2.bb b/meta/recipes-core/ncurses/ncurses_6.2.bb index e7d7396a20..598c51b00b 100644 --- a/meta/recipes-core/ncurses/ncurses_6.2.bb +++ b/meta/recipes-core/ncurses/ncurses_6.2.bb @@ -3,6 +3,7 @@ require ncurses.inc SRC_URI += "file://0001-tic-hang.patch \ file://0002-configure-reproducible.patch \ file://0003-gen-pkgconfig.in-Do-not-include-LDFLAGS-in-generated.patch \ + file://CVE-2021-39537.patch \ " # commit id corresponds to the revision in package version SRCREV = "a669013cd5e9d6434e5301348ea51baf306c93c4" diff --git a/meta/recipes-core/netbase/netbase_6.2.bb b/meta/recipes-core/netbase/netbase_6.2.bb index c016d32dd3..ad7e9becde 100644 --- a/meta/recipes-core/netbase/netbase_6.2.bb +++ b/meta/recipes-core/netbase/netbase_6.2.bb @@ -6,7 +6,7 @@ LICENSE = "GPLv2" LIC_FILES_CHKSUM = "file://debian/copyright;md5=3dd6192d306f582dee7687da3d8748ab" PE = "1" -SRC_URI = "git://salsa.debian.org/md/netbase.git;protocol=https" +SRC_URI = "git://salsa.debian.org/md/netbase.git;protocol=https;branch=master" SRCREV = "1c892c96a078ef28ec1a94681b3a0da7a3d545f7" inherit allarch diff --git a/meta/recipes-core/os-release/os-release.bb b/meta/recipes-core/os-release/os-release.bb index a29d678125..33f75e39b8 100644 --- a/meta/recipes-core/os-release/os-release.bb +++ b/meta/recipes-core/os-release/os-release.bb @@ -12,7 +12,9 @@ do_configure[noexec] = "1" # Other valid fields: BUILD_ID ID_LIKE ANSI_COLOR CPE_NAME # HOME_URL SUPPORT_URL BUG_REPORT_URL -OS_RELEASE_FIELDS = "ID ID_LIKE NAME VERSION VERSION_ID PRETTY_NAME" +OS_RELEASE_FIELDS = "\ + ID ID_LIKE NAME VERSION VERSION_ID PRETTY_NAME DISTRO_CODENAME \ +" OS_RELEASE_UNQUOTED_FIELDS = "ID VERSION_ID VARIANT_ID" ID = "${DISTRO}" diff --git a/meta/recipes-core/psplash/psplash_git.bb b/meta/recipes-core/psplash/psplash_git.bb index 59e1e3f194..e1236475fc 100644 --- a/meta/recipes-core/psplash/psplash_git.bb +++ b/meta/recipes-core/psplash/psplash_git.bb @@ -10,7 +10,7 @@ SRCREV = "0a902f7cd875ccf018456451be369f05fa55f962" PV = "0.1+git${SRCPV}" PR = "r15" -SRC_URI = "git://git.yoctoproject.org/${BPN} \ +SRC_URI = "git://git.yoctoproject.org/${BPN};branch=master \ file://psplash-init \ file://psplash-start.service \ file://psplash-systemd.service \ diff --git a/meta/recipes-core/systemd/systemd.inc b/meta/recipes-core/systemd/systemd.inc index 7d3b3064ba..b11ab112af 100644 --- a/meta/recipes-core/systemd/systemd.inc +++ b/meta/recipes-core/systemd/systemd.inc @@ -16,6 +16,6 @@ LIC_FILES_CHKSUM = "file://LICENSE.GPL2;md5=751419260aa954499f7abaabaa882bbe \ SRCREV = "17472dca0160cbe7b807ca648475fd70d0d62fe5" SRCBRANCH = "v247-stable" -SRC_URI = "git://github.com/systemd/systemd-stable.git;protocol=git;branch=${SRCBRANCH}" +SRC_URI = "git://github.com/systemd/systemd-stable.git;protocol=https;branch=${SRCBRANCH}" S = "${WORKDIR}/git" diff --git a/meta/recipes-core/update-rc.d/update-rc.d_0.8.bb b/meta/recipes-core/update-rc.d/update-rc.d_0.8.bb index da716674c3..daee5c224b 100644 --- a/meta/recipes-core/update-rc.d/update-rc.d_0.8.bb +++ b/meta/recipes-core/update-rc.d/update-rc.d_0.8.bb @@ -6,7 +6,7 @@ SECTION = "base" LICENSE = "GPLv2+" LIC_FILES_CHKSUM = "file://update-rc.d;beginline=5;endline=15;md5=d40a07c27f535425934bb5001f2037d9" -SRC_URI = "git://git.yoctoproject.org/update-rc.d" +SRC_URI = "git://git.yoctoproject.org/update-rc.d;branch=master" SRCREV = "8636cf478d426b568c1be11dbd9346f67e03adac" UPSTREAM_CHECK_COMMITS = "1" diff --git a/meta/recipes-core/util-linux/util-linux_2.36.2.bb b/meta/recipes-core/util-linux/util-linux_2.36.2.bb index c79cf782d1..82cc8a3dbf 100644 --- a/meta/recipes-core/util-linux/util-linux_2.36.2.bb +++ b/meta/recipes-core/util-linux/util-linux_2.36.2.bb @@ -73,7 +73,7 @@ EXTRA_OECONF = "\ \ --disable-bfs --disable-chfn-chsh --disable-login \ --disable-makeinstall-chown --disable-minix --disable-newgrp \ - --disable-use-tty-group --disable-vipw \ + --disable-use-tty-group --disable-vipw --disable-raw \ \ --without-udev \ \ diff --git a/meta/recipes-core/zlib/zlib/CVE-2018-25032.patch b/meta/recipes-core/zlib/zlib/CVE-2018-25032.patch new file mode 100644 index 0000000000..5cb6183641 --- /dev/null +++ b/meta/recipes-core/zlib/zlib/CVE-2018-25032.patch @@ -0,0 +1,347 @@ +CVE: CVE-2018-25032 +Upstream-Status: Backport +Signed-off-by: Ross Burton <ross.burton@arm.com> + +From 5c44459c3b28a9bd3283aaceab7c615f8020c531 Mon Sep 17 00:00:00 2001 +From: Mark Adler <madler@alumni.caltech.edu> +Date: Tue, 17 Apr 2018 22:09:22 -0700 +Subject: [PATCH] Fix a bug that can crash deflate on some input when using + Z_FIXED. + +This bug was reported by Danilo Ramos of Eideticom, Inc. It has +lain in wait 13 years before being found! The bug was introduced +in zlib 1.2.2.2, with the addition of the Z_FIXED option. That +option forces the use of fixed Huffman codes. For rare inputs with +a large number of distant matches, the pending buffer into which +the compressed data is written can overwrite the distance symbol +table which it overlays. That results in corrupted output due to +invalid distances, and can result in out-of-bound accesses, +crashing the application. + +The fix here combines the distance buffer and literal/length +buffers into a single symbol buffer. Now three bytes of pending +buffer space are opened up for each literal or length/distance +pair consumed, instead of the previous two bytes. This assures +that the pending buffer cannot overwrite the symbol table, since +the maximum fixed code compressed length/distance is 31 bits, and +since there are four bytes of pending space for every three bytes +of symbol space. +--- + deflate.c | 74 ++++++++++++++++++++++++++++++++++++++++--------------- + deflate.h | 25 +++++++++---------- + trees.c | 50 +++++++++++-------------------------- + 3 files changed, 79 insertions(+), 70 deletions(-) + +diff --git a/deflate.c b/deflate.c +index 425babc00..19cba873a 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -255,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + +- ushf *overlay; +- /* We overlay pending_buf and d_buf+l_buf. This works since the average +- * output size for (length,distance) codes is <= 24 bits. +- */ +- + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; +@@ -329,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + +- overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); +- s->pending_buf = (uchf *) overlay; +- s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); ++ /* We overlay pending_buf and sym_buf. This works since the average size ++ * for length/distance pairs over any compressed block is assured to be 31 ++ * bits or less. ++ * ++ * Analysis: The longest fixed codes are a length code of 8 bits plus 5 ++ * extra bits, for lengths 131 to 257. The longest fixed distance codes are ++ * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest ++ * possible fixed-codes length/distance pair is then 31 bits total. ++ * ++ * sym_buf starts one-fourth of the way into pending_buf. So there are ++ * three bytes in sym_buf for every four bytes in pending_buf. Each symbol ++ * in sym_buf is three bytes -- two for the distance and one for the ++ * literal/length. As each symbol is consumed, the pointer to the next ++ * sym_buf value to read moves forward three bytes. From that symbol, up to ++ * 31 bits are written to pending_buf. The closest the written pending_buf ++ * bits gets to the next sym_buf symbol to read is just before the last ++ * code is written. At that time, 31*(n-2) bits have been written, just ++ * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at ++ * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1 ++ * symbols are written.) The closest the writing gets to what is unread is ++ * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and ++ * can range from 128 to 32768. ++ * ++ * Therefore, at a minimum, there are 142 bits of space between what is ++ * written and what is read in the overlain buffers, so the symbols cannot ++ * be overwritten by the compressed data. That space is actually 139 bits, ++ * due to the three-bit fixed-code block header. ++ * ++ * That covers the case where either Z_FIXED is specified, forcing fixed ++ * codes, or when the use of fixed codes is chosen, because that choice ++ * results in a smaller compressed block than dynamic codes. That latter ++ * condition then assures that the above analysis also covers all dynamic ++ * blocks. A dynamic-code block will only be chosen to be emitted if it has ++ * fewer bits than a fixed-code block would for the same set of symbols. ++ * Therefore its average symbol length is assured to be less than 31. So ++ * the compressed data for a dynamic block also cannot overwrite the ++ * symbols from which it is being constructed. ++ */ ++ ++ s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); ++ s->pending_buf_size = (ulg)s->lit_bufsize * 4; + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { +@@ -340,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + deflateEnd (strm); + return Z_MEM_ERROR; + } +- s->d_buf = overlay + s->lit_bufsize/sizeof(ush); +- s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; ++ s->sym_buf = s->pending_buf + s->lit_bufsize; ++ s->sym_end = (s->lit_bufsize - 1) * 3; ++ /* We avoid equality with lit_bufsize*3 because of wraparound at 64K ++ * on 16 bit machines and because stored blocks are restricted to ++ * 64K-1 bytes. ++ */ + + s->level = level; + s->strategy = strategy; +@@ -552,7 +589,7 @@ int ZEXPORT deflatePrime (strm, bits, value) + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; +- if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) ++ if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; + do { + put = Buf_size - s->bi_valid; +@@ -1113,7 +1150,6 @@ int ZEXPORT deflateCopy (dest, source) + #else + deflate_state *ds; + deflate_state *ss; +- ushf *overlay; + + + if (deflateStateCheck(source) || dest == Z_NULL) { +@@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source) + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); +- overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); +- ds->pending_buf = (uchf *) overlay; ++ ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { +@@ -1148,8 +1183,7 @@ int ZEXPORT deflateCopy (dest, source) + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); +- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); +- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; ++ ds->sym_buf = ds->pending_buf + ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; +@@ -1925,7 +1959,7 @@ local block_state deflate_fast(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +@@ -2056,7 +2090,7 @@ local block_state deflate_slow(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +@@ -2131,7 +2165,7 @@ local block_state deflate_rle(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +@@ -2170,7 +2204,7 @@ local block_state deflate_huff(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +diff --git a/deflate.h b/deflate.h +index 23ecdd312..d4cf1a98b 100644 +--- a/deflate.h ++++ b/deflate.h +@@ -217,7 +217,7 @@ typedef struct internal_state { + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + +- uchf *l_buf; /* buffer for literals or lengths */ ++ uchf *sym_buf; /* buffer for distances and literals/lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for +@@ -239,13 +239,8 @@ typedef struct internal_state { + * - I can't count above 4 + */ + +- uInt last_lit; /* running index in l_buf */ +- +- ushf *d_buf; +- /* Buffer for distances. To simplify the code, d_buf and l_buf have +- * the same number of elements. To use different lengths, an extra flag +- * array would be necessary. +- */ ++ uInt sym_next; /* running index in sym_buf */ ++ uInt sym_end; /* symbol table full when sym_next reaches this */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ +@@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + + # define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ +- s->d_buf[s->last_lit] = 0; \ +- s->l_buf[s->last_lit++] = cc; \ ++ s->sym_buf[s->sym_next++] = 0; \ ++ s->sym_buf[s->sym_next++] = 0; \ ++ s->sym_buf[s->sym_next++] = cc; \ + s->dyn_ltree[cc].Freq++; \ +- flush = (s->last_lit == s->lit_bufsize-1); \ ++ flush = (s->sym_next == s->sym_end); \ + } + # define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ +- s->d_buf[s->last_lit] = dist; \ +- s->l_buf[s->last_lit++] = len; \ ++ s->sym_buf[s->sym_next++] = dist; \ ++ s->sym_buf[s->sym_next++] = dist >> 8; \ ++ s->sym_buf[s->sym_next++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ +- flush = (s->last_lit == s->lit_bufsize-1); \ ++ flush = (s->sym_next == s->sym_end); \ + } + #else + # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +diff --git a/trees.c b/trees.c +index 4f4a65011..decaeb7c3 100644 +--- a/trees.c ++++ b/trees.c +@@ -416,7 +416,7 @@ local void init_block(s) + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; +- s->last_lit = s->matches = 0; ++ s->sym_next = s->matches = 0; + } + + #define SMALLEST 1 +@@ -948,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, +- s->last_lit)); ++ s->sym_next / 3)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + +@@ -1017,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ + { +- s->d_buf[s->last_lit] = (ush)dist; +- s->l_buf[s->last_lit++] = (uch)lc; ++ s->sym_buf[s->sym_next++] = dist; ++ s->sym_buf[s->sym_next++] = dist >> 8; ++ s->sym_buf[s->sym_next++] = lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; +@@ -1033,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } +- +-#ifdef TRUNCATE_BLOCK +- /* Try to guess if it is profitable to stop the current block here */ +- if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { +- /* Compute an upper bound for the compressed length */ +- ulg out_length = (ulg)s->last_lit*8L; +- ulg in_length = (ulg)((long)s->strstart - s->block_start); +- int dcode; +- for (dcode = 0; dcode < D_CODES; dcode++) { +- out_length += (ulg)s->dyn_dtree[dcode].Freq * +- (5L+extra_dbits[dcode]); +- } +- out_length >>= 3; +- Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", +- s->last_lit, in_length, out_length, +- 100L - out_length*100L/in_length)); +- if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; +- } +-#endif +- return (s->last_lit == s->lit_bufsize-1); +- /* We avoid equality with lit_bufsize because of wraparound at 64K +- * on 16 bit machines and because stored blocks are restricted to +- * 64K-1 bytes. +- */ ++ return (s->sym_next == s->sym_end); + } + + /* =========================================================================== +@@ -1069,13 +1047,14 @@ local void compress_block(s, ltree, dtree) + { + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ +- unsigned lx = 0; /* running index in l_buf */ ++ unsigned sx = 0; /* running index in sym_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + +- if (s->last_lit != 0) do { +- dist = s->d_buf[lx]; +- lc = s->l_buf[lx++]; ++ if (s->sym_next != 0) do { ++ dist = s->sym_buf[sx++] & 0xff; ++ dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; ++ lc = s->sym_buf[sx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); +@@ -1100,11 +1079,10 @@ local void compress_block(s, ltree, dtree) + } + } /* literal or match pair ? */ + +- /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ +- Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, +- "pendingBuf overflow"); ++ /* Check that the overlay between pending_buf and sym_buf is ok: */ ++ Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); + +- } while (lx < s->last_lit); ++ } while (sx < s->sym_next); + + send_code(s, END_BLOCK, ltree); + } diff --git a/meta/recipes-core/zlib/zlib_1.2.11.bb b/meta/recipes-core/zlib/zlib_1.2.11.bb index ef9431ae47..bc42cd64e9 100644 --- a/meta/recipes-core/zlib/zlib_1.2.11.bb +++ b/meta/recipes-core/zlib/zlib_1.2.11.bb @@ -8,6 +8,7 @@ LIC_FILES_CHKSUM = "file://zlib.h;beginline=6;endline=23;md5=5377232268e952e9ef6 SRC_URI = "${SOURCEFORGE_MIRROR}/libpng/${BPN}/${PV}/${BPN}-${PV}.tar.xz \ file://ldflags-tests.patch \ + file://CVE-2018-25032.patch \ file://run-ptest \ " UPSTREAM_CHECK_URI = "http://zlib.net/" |