meta/recipes-core/expat/expat/CVE-2022-25236-2.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232

Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/f178826b]
CVE: CVE-2022-25236

The commit is a merge commit, and this patch is created by:

$ git show -m -p --stat f178826b

Remove changes for expat/Changes and reference.html which fail to be applied.

Signed-off-by: Kai Kang <kai.kang@windriver.com>

commit f178826bb1e9c8ee23202f1be55ad4ac7b649e84 (from c99e0e7f2b15b48848038992ecbb4480f957cfe9)
Merge: c99e0e7f 9579f7ea
Author: Sebastian Pipping <sebastian@pipping.org>
Date:   Fri Mar 4 18:43:39 2022 +0100

    Merge pull request #577 from libexpat/namesep
    
    lib: Relax fix to CVE-2022-25236 with regard to RFC 3986 URI characters (fixes #572)
---
 expat/Changes            |  16 ++++++
 expat/doc/reference.html |   8 +++
 expat/lib/expat.h        |  11 ++++
 expat/lib/xmlparse.c     | 139 ++++++++++++++++++++++++++++++++++++++++++++---
 expat/tests/runtests.c   |   8 ++-
 5 files changed, 171 insertions(+), 11 deletions(-)

diff --git a/lib/expat.h b/lib/expat.h
index 5ab493f7..181fc960 100644
--- a/lib/expat.h
+++ b/lib/expat.h
@@ -239,6 +239,17 @@ XML_ParserCreate(const XML_Char *encoding);
    and the local part will be concatenated without any separator.
    It is a programming error to use the separator '\0' with namespace
    triplets (see XML_SetReturnNSTriplet).
+   If a namespace separator is chosen that can be part of a URI or
+   part of an XML name, splitting an expanded name back into its
+   1, 2 or 3 original parts on application level in the element handler
+   may end up vulnerable, so these are advised against;  sane choices for
+   a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
+
+   Note that Expat does not validate namespace URIs (beyond encoding)
+   against RFC 3986 today (and is not required to do so with regard to
+   the XML 1.0 namespaces specification) but it may start doing that
+   in future releases.  Before that, an application using Expat must
+   be ready to receive namespace URIs containing non-URI characters.
 */
 XMLPARSEAPI(XML_Parser)
 XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 59da19c8..6fe2cf1e 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -3705,6 +3705,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
   return XML_ERROR_NONE;
 }
 
+static XML_Bool
+is_rfc3986_uri_char(XML_Char candidate) {
+  // For the RFC 3986 ANBF grammar see
+  // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
+
+  switch (candidate) {
+  // From rule "ALPHA" (uppercase half)
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'D':
+  case 'E':
+  case 'F':
+  case 'G':
+  case 'H':
+  case 'I':
+  case 'J':
+  case 'K':
+  case 'L':
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P':
+  case 'Q':
+  case 'R':
+  case 'S':
+  case 'T':
+  case 'U':
+  case 'V':
+  case 'W':
+  case 'X':
+  case 'Y':
+  case 'Z':
+
+  // From rule "ALPHA" (lowercase half)
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'e':
+  case 'f':
+  case 'g':
+  case 'h':
+  case 'i':
+  case 'j':
+  case 'k':
+  case 'l':
+  case 'm':
+  case 'n':
+  case 'o':
+  case 'p':
+  case 'q':
+  case 'r':
+  case 's':
+  case 't':
+  case 'u':
+  case 'v':
+  case 'w':
+  case 'x':
+  case 'y':
+  case 'z':
+
+  // From rule "DIGIT"
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
+
+  // From rule "pct-encoded"
+  case '%':
+
+  // From rule "unreserved"
+  case '-':
+  case '.':
+  case '_':
+  case '~':
+
+  // From rule "gen-delims"
+  case ':':
+  case '/':
+  case '?':
+  case '#':
+  case '[':
+  case ']':
+  case '@':
+
+  // From rule "sub-delims"
+  case '!':
+  case '$':
+  case '&':
+  case '\'':
+  case '(':
+  case ')':
+  case '*':
+  case '+':
+  case ',':
+  case ';':
+  case '=':
+    return XML_TRUE;
+
+  default:
+    return XML_FALSE;
+  }
+}
+
 /* addBinding() overwrites the value of prefix->binding without checking.
    Therefore one must keep track of the old value outside of addBinding().
 */
@@ -3763,14 +3874,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
       isXMLNS = XML_FALSE;
 
-    // NOTE: While Expat does not validate namespace URIs against RFC 3986,
-    //       we have to at least make sure that the XML processor on top of
-    //       Expat (that is splitting tag names by namespace separator into
-    //       2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
-    //       by an attacker putting additional namespace separator characters
-    //       into namespace declarations.  That would be ambiguous and not to
-    //       be expected.
-    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
+    // NOTE: While Expat does not validate namespace URIs against RFC 3986
+    //       today (and is not REQUIRED to do so with regard to the XML 1.0
+    //       namespaces specification) we have to at least make sure, that
+    //       the application on top of Expat (that is likely splitting expanded
+    //       element names ("qualified names") of form
+    //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
+    //       in its element handler code) cannot be confused by an attacker
+    //       putting additional namespace separator characters into namespace
+    //       declarations.  That would be ambiguous and not to be expected.
+    //
+    //       While the HTML API docs of function XML_ParserCreateNS have been
+    //       advising against use of a namespace separator character that can
+    //       appear in a URI for >20 years now, some widespread applications
+    //       are using URI characters (':' (colon) in particular) for a
+    //       namespace separator, in practice.  To keep these applications
+    //       functional, we only reject namespaces URIs containing the
+    //       application-chosen namespace separator if the chosen separator
+    //       is a non-URI character with regard to RFC 3986.
+    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
+        && ! is_rfc3986_uri_char(uri[len])) {
       return XML_ERROR_SYNTAX;
     }
   }
diff --git a/tests/runtests.c b/tests/runtests.c
index 60da868e..712706c4 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -7406,16 +7406,18 @@ START_TEST(test_ns_separator_in_uri) {
   struct test_case {
     enum XML_Status expectedStatus;
     const char *doc;
+    XML_Char namesep;
   };
   struct test_case cases[] = {
-      {XML_STATUS_OK, "<doc xmlns='one_two' />"},
-      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
+      {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
+      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />", XCS('\n')},
+      {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
   };
 
   size_t i = 0;
   size_t failCount = 0;
   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
-    XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
+    XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
     XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
     if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
                   /*isFinal*/ XML_TRUE)