summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc/0004-CVE-2021-42574.patch
blob: 4999c71b641a69c7fd404e0a3e30704d153389c9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
From bef32d4a28595e933f24fef378cf052a30b674a7 Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Tue, 2 Nov 2021 15:45:22 -0400
Subject: [PATCH] libcpp: capture and underline ranges in -Wbidi-chars=
 [PR103026]
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

This patch converts the bidi::vec to use a struct so that we can
capture location_t values for the bidirectional control characters.

Before:

  Wbidi-chars-1.c: In function âmainâ:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
      6 |     /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
        |                                                                           ^
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
      9 |     /* end admins only <U+202E> { <U+2066>*/
        |                                            ^

After:

  Wbidi-chars-1.c: In function âmainâ:
  Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=]
      6 |     /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
        |       ~~~~~~~~                                ~~~~~~~~                    ^
        |       |                                       |                           |
        |       |                                       |                           end of bidirectional context
        |       U+202E (RIGHT-TO-LEFT OVERRIDE)         U+2066 (LEFT-TO-RIGHT ISOLATE)
  Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=]
      9 |     /* end admins only <U+202E> { <U+2066>*/
        |                        ~~~~~~~~   ~~~~~~~~ ^
        |                        |          |        |
        |                        |          |        end of bidirectional context
        |                        |          U+2066 (LEFT-TO-RIGHT ISOLATE)
        |                        U+202E (RIGHT-TO-LEFT OVERRIDE)

Signed-off-by: David Malcolm <dmalcolm@redhat.com>

gcc/testsuite/ChangeLog:
	PR preprocessor/103026
	* c-c++-common/Wbidi-chars-ranges.c: New test.

libcpp/ChangeLog:
	PR preprocessor/103026
	* lex.c (struct bidi::context): New.
	(bidi::vec): Convert to a vec of context rather than unsigned
	char.
	(bidi::ctx_at): Rename to...
	(bidi::pop_kind_at): ...this and reimplement for above change.
	(bidi::current_ctx): Update for change to vec.
	(bidi::current_ctx_ucn_p): Likewise.
	(bidi::current_ctx_loc): New.
	(bidi::on_char): Update for usage of context struct.  Add "loc"
	param and pass it when pushing contexts.
	(get_location_for_byte_range_in_cur_line): New.
	(get_bidi_utf8): Rename to...
	(get_bidi_utf8_1): ...this, reintroducing...
	(get_bidi_utf8): ...as a wrapper, setting *OUT when the result is
	not NONE.
	(get_bidi_ucn): Rename to...
	(get_bidi_ucn_1): ...this, reintroducing...
	(get_bidi_ucn): ...as a wrapper, setting *OUT when the result is
	not NONE.
	(class unpaired_bidi_rich_location): New.
	(maybe_warn_bidi_on_close): Use unpaired_bidi_rich_location when
	reporting on unpaired bidi chars.  Split into singular vs plural
	spellings.
	(maybe_warn_bidi_on_char): Pass in a location_t rather than a
	const uchar * and use it when emitting warnings, and when calling
	bidi::on_char.
	(_cpp_skip_block_comment): Capture location when kind is not NONE
	and pass it to maybe_warn_bidi_on_char.
	(skip_line_comment): Likewise.
	(forms_identifier_p): Likewise.
	(lex_raw_string): Likewise.
	(lex_string): Likewise.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>

CVE: CVE-2021-42574
Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=bef32d4a28595e933f24fef378cf052a30b674a7]
Signed-off-by: Pgowda <pgowda.cve@gmail.com>

---
 .../c-c++-common/Wbidi-chars-ranges.c         |  54 ++++
 libcpp/lex.c                                  | 251 ++++++++++++++----
 2 files changed, 257 insertions(+), 48 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c

diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
new file mode 100644
index 00000000000..298750a2a64
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
@@ -0,0 +1,54 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired -fdiagnostics-show-caret" } */
+/* Verify that we escape and underline pertinent bidirectional
+   control characters when quoting the source.  */
+
+int test_unpaired_bidi () {
+    int isAdmin = 0;
+    /*â® } â¦if (isAdmin)⩠⦠begin admins only */
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+#if 0
+   { dg-begin-multiline-output "" }
+     /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
+       ~~~~~~~~                                ~~~~~~~~                    ^
+       |                                       |                           |
+       |                                       |                           end of bidirectional context
+       U+202E (RIGHT-TO-LEFT OVERRIDE)         U+2066 (LEFT-TO-RIGHT ISOLATE)
+   { dg-end-multiline-output "" }
+#endif
+
+        __builtin_printf("You are an admin.\n");
+    /* end admins only â® { â¦*/
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+#if 0
+   { dg-begin-multiline-output "" }
+     /* end admins only <U+202E> { <U+2066>*/
+                        ~~~~~~~~   ~~~~~~~~ ^
+                        |          |        |
+                        |          |        end of bidirectional context
+                        |          U+2066 (LEFT-TO-RIGHT ISOLATE)
+                        U+202E (RIGHT-TO-LEFT OVERRIDE)
+   { dg-end-multiline-output "" }
+#endif
+
+    return 0;
+}
+
+int LRE_âª_PDF_\u202c;
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+#if 0
+   { dg-begin-multiline-output "" }
+ int LRE_<U+202A>_PDF_\u202c;
+         ~~~~~~~~     ^~~~~~
+   { dg-end-multiline-output "" }
+#endif
+
+const char *s1 = "LRE_âª_PDF_\u202c";
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+#if 0
+   { dg-begin-multiline-output "" }
+ const char *s1 = "LRE_<U+202A>_PDF_\u202c";
+                       ~~~~~~~~     ^~~~~~
+   { dg-end-multiline-output "" }
+#endif
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 2421d6c0f40..94c36f0d014 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1172,11 +1172,34 @@ namespace bidi {
   /* All the UTF-8 encodings of bidi characters start with E2.  */
   constexpr uchar utf8_start = 0xe2;
 
+  struct context
+  {
+    context () {}
+    context (location_t loc, kind k, bool pdf, bool ucn)
+    : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
+    {
+    }
+
+    kind get_pop_kind () const
+    {
+      return m_pdf ? kind::PDF : kind::PDI;
+    }
+    bool ucn_p () const
+    {
+      return m_ucn;
+    }
+
+    location_t m_loc;
+    kind m_kind;
+    unsigned m_pdf : 1;
+    unsigned m_ucn : 1;
+  };
+
   /* A vector holding currently open bidi contexts.  We use a char for
      each context, its LSB is 1 if it represents a PDF context, 0 if it
      represents a PDI context.  The next bit is 1 if this context was open
      by a bidi character written as a UCN, and 0 when it was UTF-8.  */
-  semi_embedded_vec <unsigned char, 16> vec;
+  semi_embedded_vec <context, 16> vec;
 
   /* Close the whole comment/identifier/string literal/character constant
      context.  */
@@ -1193,19 +1216,19 @@ namespace bidi {
     vec.truncate (len - 1);
   }
 
-  /* Return the context of the Ith element.  */
-  kind ctx_at (unsigned int i)
+  /* Return the pop kind of the context of the Ith element.  */
+  kind pop_kind_at (unsigned int i)
   {
-    return (vec[i] & 1) ? kind::PDF : kind::PDI;
+    return vec[i].get_pop_kind ();
   }
 
-  /* Return which context is currently opened.  */
+  /* Return the pop kind of the context that is currently opened.  */
   kind current_ctx ()
   {
     unsigned int len = vec.count ();
     if (len == 0)
       return kind::NONE;
-    return ctx_at (len - 1);
+    return vec[len - 1].get_pop_kind ();
   }
 
   /* Return true if the current context comes from a UCN origin, that is,
@@ -1214,11 +1237,19 @@ namespace bidi {
   {
     unsigned int len = vec.count ();
     gcc_checking_assert (len > 0);
-    return (vec[len - 1] >> 1) & 1;
+    return vec[len - 1].m_ucn;
   }
 
-  /* We've read a bidi char, update the current vector as necessary.  */
-  void on_char (kind k, bool ucn_p)
+  location_t current_ctx_loc ()
+  {
+    unsigned int len = vec.count ();
+    gcc_checking_assert (len > 0);
+    return vec[len - 1].m_loc;
+  }
+
+  /* We've read a bidi char, update the current vector as necessary.
+     LOC is only valid when K is not kind::NONE.  */
+  void on_char (kind k, bool ucn_p, location_t loc)
   {
     switch (k)
       {
@@ -1226,12 +1257,12 @@ namespace bidi {
       case kind::RLE:
       case kind::LRO:
       case kind::RLO:
-	vec.push (ucn_p ? 3u : 1u);
+	vec.push (context (loc, k, true, ucn_p));
 	break;
       case kind::LRI:
       case kind::RLI:
       case kind::FSI:
-	vec.push (ucn_p ? 2u : 0u);
+	vec.push (context (loc, k, false, ucn_p));
 	break;
       /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
 	 whose scope has not yet been terminated.  */
@@ -1245,7 +1276,7 @@ namespace bidi {
 	 yet been terminated.  */
       case kind::PDI:
 	for (int i = vec.count () - 1; i >= 0; --i)
-	  if (ctx_at (i) == kind::PDI)
+	  if (pop_kind_at (i) == kind::PDI)
 	    {
 	      vec.truncate (i);
 	      break;
@@ -1295,10 +1326,47 @@ namespace bidi {
   }
 }
 
+/* Get location_t for the range of bytes [START, START + NUM_BYTES)
+   within the current line in FILE, with the caret at START.  */
+
+static location_t
+get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
+					 const unsigned char *const start,
+					 size_t num_bytes)
+{
+  gcc_checking_assert (num_bytes > 0);
+
+  /* CPP_BUF_COLUMN and linemap_position_for_column both refer
+     to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
+     whereas linemap_position_for_column is 1-based.  */
+
+  /* Get 0-based offsets within the line.  */
+  size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start);
+  size_t end_offset = start_offset + num_bytes - 1;
+
+  /* Now convert to location_t, where "columns" are 1-based byte offsets.  */
+  location_t start_loc = linemap_position_for_column (pfile->line_table,
+						      start_offset + 1);
+  location_t end_loc = linemap_position_for_column (pfile->line_table,
+						     end_offset + 1);
+
+  if (start_loc == end_loc)
+    return start_loc;
+
+  source_range src_range;
+  src_range.m_start = start_loc;
+  src_range.m_finish = end_loc;
+  location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table,
+						   start_loc,
+						   src_range,
+						   NULL);
+  return combined_loc;
+}
+
 /* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
 
 static bidi::kind
-get_bidi_utf8 (const unsigned char *const p)
+get_bidi_utf8_1 (const unsigned char *const p)
 {
   gcc_checking_assert (p[0] == bidi::utf8_start);
 
@@ -1340,10 +1408,25 @@ get_bidi_utf8 (const unsigned char *cons
   return bidi::kind::NONE;
 }
 
+/* Parse a sequence of 3 bytes starting with P and return its bidi code.
+   If the kind is not NONE, write the location to *OUT.*/
+
+static bidi::kind
+get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
+{
+  bidi::kind result = get_bidi_utf8_1 (p);
+  if (result != bidi::kind::NONE)
+    {
+      /* We have a sequence of 3 bytes starting at P.  */
+      *out = get_location_for_byte_range_in_cur_line (pfile, p, 3);
+    }
+  return result;
+}
+
 /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
 
 static bidi::kind
-get_bidi_ucn (const unsigned char *p, bool is_U)
+get_bidi_ucn_1 (const unsigned char *p, bool is_U)
 {
   /* 6.4.3 Universal Character Names
       \u hex-quad
@@ -1412,6 +1495,62 @@ get_bidi_ucn (const unsigned char *p, bo
   return bidi::kind::NONE;
 }
 
+/* Parse a UCN where P points just past \u or \U and return its bidi code.
+   If the kind is not NONE, write the location to *OUT.*/
+
+static bidi::kind
+get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
+	      location_t *out)
+{
+  bidi::kind result = get_bidi_ucn_1 (p, is_U);
+  if (result != bidi::kind::NONE)
+    {
+      const unsigned char *start = p - 2;
+      size_t num_bytes = 2 + (is_U ? 8 : 4);
+      *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
+    }
+  return result;
+}
+
+/* Subclass of rich_location for reporting on unpaired UTF-8
+   bidirectional control character(s).
+   Escape the source lines on output, and show all unclosed
+   bidi context, labelling everything.  */
+
+class unpaired_bidi_rich_location : public rich_location
+{
+ public:
+  class custom_range_label : public range_label
+  {
+   public:
+     label_text get_text (unsigned range_idx) const FINAL OVERRIDE
+     {
+       /* range 0 is the primary location; each subsequent range i + 1
+	  is for bidi::vec[i].  */
+       if (range_idx > 0)
+	 {
+	   const bidi::context &ctxt (bidi::vec[range_idx - 1]);
+	   return label_text::borrow (bidi::to_str (ctxt.m_kind));
+	 }
+       else
+	 return label_text::borrow (_("end of bidirectional context"));
+     }
+  };
+
+  unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc)
+  : rich_location (pfile->line_table, loc, &m_custom_label)
+  {
+    set_escape_on_output (true);
+    for (unsigned i = 0; i < bidi::vec.count (); i++)
+      add_range (bidi::vec[i].m_loc,
+		 SHOW_RANGE_WITHOUT_CARET,
+		 &m_custom_label);
+  }
+
+ private:
+   custom_range_label m_custom_label;
+};
+
 /* We're closing a bidi context, that is, we've encountered a newline,
    are closing a C-style comment, or are at the end of a string literal,
    character constant, or identifier.  Warn if this context was not
@@ -1427,11 +1566,17 @@ maybe_warn_bidi_on_close (cpp_reader *pf
       const location_t loc
 	= linemap_position_for_column (pfile->line_table,
 				       CPP_BUF_COLUMN (pfile->buffer, p));
-      rich_location rich_loc (pfile->line_table, loc);
-      rich_loc.set_escape_on_output (true);
-      cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
-		      "unpaired UTF-8 bidirectional control character "
-		      "detected");
+      unpaired_bidi_rich_location rich_loc (pfile, loc);
+      /* cpp_callbacks doesn't yet have a way to handle singular vs plural
+	 forms of a diagnostic, so fake it for now.  */
+      if (bidi::vec.count () > 1)
+	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+			"unpaired UTF-8 bidirectional control characters "
+			"detected");
+      else
+	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+			"unpaired UTF-8 bidirectional control character "
+			"detected");
     }
   /* We're done with this context.  */
   bidi::on_close ();
@@ -1439,12 +1584,13 @@ maybe_warn_bidi_on_close (cpp_reader *pf
 
 /* We're at the beginning or in the middle of an identifier/comment/string
    literal/character constant.  Warn if we've encountered a bidi character.
-   KIND says which bidi character it was; P points to it in the character
-   stream.  UCN_P is true iff this bidi character was written as a UCN.  */
+   KIND says which bidi control character it was; UCN_P is true iff this bidi
+   control character was written as a UCN.  LOC is the location of the
+   character, but is only valid if KIND != bidi::kind::NONE.  */
 
 static void
-maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
-			 bool ucn_p)
+maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
+			 bool ucn_p, location_t loc)
 {
   if (__builtin_expect (kind == bidi::kind::NONE, 1))
     return;
@@ -1453,9 +1599,6 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
 
   if (warn_bidi != bidirectional_none)
     {
-      const location_t loc
-	= linemap_position_for_column (pfile->line_table,
-				       CPP_BUF_COLUMN (pfile->buffer, p));
       rich_location rich_loc (pfile->line_table, loc);
       rich_loc.set_escape_on_output (true);
 
@@ -1467,9 +1610,12 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
 	{
 	  if (warn_bidi == bidirectional_unpaired
 	      && bidi::current_ctx_ucn_p () != ucn_p)
-	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
-			    "UTF-8 vs UCN mismatch when closing "
-			    "a context by \"%s\"", bidi::to_str (kind));
+	    {
+	      rich_loc.add_range (bidi::current_ctx_loc ());
+	      cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+			      "UTF-8 vs UCN mismatch when closing "
+			      "a context by \"%s\"", bidi::to_str (kind));
+	    }
 	}
       else if (warn_bidi == bidirectional_any)
 	{
@@ -1484,7 +1630,7 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
 	}
     }
   /* We're done with this context.  */
-  bidi::on_char (kind, ucn_p);
+  bidi::on_char (kind, ucn_p, loc);
 }
 
 /* Skip a C-style block comment.  We find the end of the comment by
@@ -1552,8 +1698,9 @@ _cpp_skip_block_comment (cpp_reader *pfi
 	 a bidirectional control character.  */
       else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
 	{
-	  bidi::kind kind = get_bidi_utf8 (cur - 1);
-	  maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
+	  location_t loc;
+	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
+	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
 	}
     }
 
@@ -1586,9 +1733,9 @@ skip_line_comment (cpp_reader *pfile)
 	    {
 	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
 		{
-		  bidi::kind kind = get_bidi_utf8 (buffer->cur);
-		  maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
-					   /*ucn_p=*/false);
+		  location_t loc;
+		  bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
+		  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
 		}
 	      buffer->cur++;
 	    }
@@ -1708,9 +1855,9 @@ forms_identifier_p (cpp_reader *pfile, i
 	  if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
 	      && warn_bidi_p)
 	    {
-	      bidi::kind kind = get_bidi_utf8 (buffer->cur);
-	      maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
-				       /*ucn_p=*/false);
+	      location_t loc;
+	      bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
+	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
 	    }
 	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
 			       state, &s))
@@ -1722,10 +1869,12 @@ forms_identifier_p (cpp_reader *pfile, i
 	  buffer->cur += 2;
 	  if (warn_bidi_p)
 	    {
-	      bidi::kind kind = get_bidi_ucn (buffer->cur,
-					      buffer->cur[-1] == 'U');
-	      maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
-				       /*ucn_p=*/true);
+	      location_t loc;
+	      bidi::kind kind = get_bidi_ucn (pfile,
+					      buffer->cur,
+					      buffer->cur[-1] == 'U',
+					      &loc);
+	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
 	    }
 	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
 			      state, &s, NULL, NULL))
@@ -2336,8 +2485,11 @@ lex_raw_string (cpp_reader *pfile, cpp_t
 	}
       else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
 	       && warn_bidi_p)
-	maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1),
-				 /*ucn_p=*/false);
+	{
+	  location_t loc;
+	  bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
+	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
+	}
     }
 
   if (warn_bidi_p)
@@ -2447,8 +2599,10 @@ lex_string (cpp_reader *pfile, cpp_token
 	{
 	  if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
 	    {
-	      bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
-	      maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
+	      location_t loc;
+	      bidi::kind kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U',
+					      &loc);
+	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
 	    }
 	  cur++;
 	}
@@ -2476,8 +2630,9 @@ lex_string (cpp_reader *pfile, cpp_token
 	saw_NUL = true;
       else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
 	{
-	  bidi::kind kind = get_bidi_utf8 (cur - 1);
-	  maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
+	  location_t loc;
+	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
+	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
 	}
     }