struct / union in initializer, RFE #901.
[sdcc.git] / sdcc / support / cpp / gcc / input.cc
blobb3970613d812ff571644c3fd944cfeac6d515dac
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "selftest.h"
26 #include "cpplib.h"
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
32 /* Input charset configuration. */
33 static const char *default_charset_callback (const char *)
35 return nullptr;
38 void
39 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
40 bool should_skip_bom)
42 in_context.ccb = (ccb ? ccb : default_charset_callback);
43 in_context.should_skip_bom = should_skip_bom;
46 /* This is a cache used by get_next_line to store the content of a
47 file to be searched for file lines. */
48 class file_cache_slot
50 public:
51 file_cache_slot ();
52 ~file_cache_slot ();
54 bool read_line_num (size_t line_num,
55 char ** line, ssize_t *line_len);
57 /* Accessors. */
58 const char *get_file_path () const { return m_file_path; }
59 unsigned get_use_count () const { return m_use_count; }
60 bool missing_trailing_newline_p () const
62 return m_missing_trailing_newline;
65 void inc_use_count () { m_use_count++; }
67 bool create (const file_cache::input_context &in_context,
68 const char *file_path, FILE *fp, unsigned highest_use_count);
69 void evict ();
71 private:
72 /* These are information used to store a line boundary. */
73 class line_info
75 public:
76 /* The line number. It starts from 1. */
77 size_t line_num;
79 /* The position (byte count) of the beginning of the line,
80 relative to the file data pointer. This starts at zero. */
81 size_t start_pos;
83 /* The position (byte count) of the last byte of the line. This
84 normally points to the '\n' character, or to one byte after the
85 last byte of the file, if the file doesn't contain a '\n'
86 character. */
87 size_t end_pos;
89 line_info (size_t l, size_t s, size_t e)
90 : line_num (l), start_pos (s), end_pos (e)
93 line_info ()
94 :line_num (0), start_pos (0), end_pos (0)
98 bool needs_read_p () const;
99 bool needs_grow_p () const;
100 void maybe_grow ();
101 bool read_data ();
102 bool maybe_read_data ();
103 bool get_next_line (char **line, ssize_t *line_len);
104 bool read_next_line (char ** line, ssize_t *line_len);
105 bool goto_next_line ();
107 static const size_t buffer_size = 4 * 1024;
108 static const size_t line_record_size = 100;
110 /* The number of time this file has been accessed. This is used
111 to designate which file cache to evict from the cache
112 array. */
113 unsigned m_use_count;
115 /* The file_path is the key for identifying a particular file in
116 the cache.
117 For libcpp-using code, the underlying buffer for this field is
118 owned by the corresponding _cpp_file within the cpp_reader. */
119 const char *m_file_path;
121 FILE *m_fp;
123 /* This points to the content of the file that we've read so
124 far. */
125 char *m_data;
127 /* The allocated buffer to be freed may start a little earlier than DATA,
128 e.g. if a UTF8 BOM was skipped at the beginning. */
129 int m_alloc_offset;
131 /* The size of the DATA array above.*/
132 size_t m_size;
134 /* The number of bytes read from the underlying file so far. This
135 must be less (or equal) than SIZE above. */
136 size_t m_nb_read;
138 /* The index of the beginning of the current line. */
139 size_t m_line_start_idx;
141 /* The number of the previous line read. This starts at 1. Zero
142 means we've read no line so far. */
143 size_t m_line_num;
145 /* This is the total number of lines of the current file. At the
146 moment, we try to get this information from the line map
147 subsystem. Note that this is just a hint. When using the C++
148 front-end, this hint is correct because the input file is then
149 completely tokenized before parsing starts; so the line map knows
150 the number of lines before compilation really starts. For e.g,
151 the C front-end, it can happen that we start emitting diagnostics
152 before the line map has seen the end of the file. */
153 size_t m_total_lines;
155 /* Could this file be missing a trailing newline on its final line?
156 Initially true (to cope with empty files), set to true/false
157 as each line is read. */
158 bool m_missing_trailing_newline;
160 /* This is a record of the beginning and end of the lines we've seen
161 while reading the file. This is useful to avoid walking the data
162 from the beginning when we are asked to read a line that is
163 before LINE_START_IDX above. Note that the maximum size of this
164 record is line_record_size, so that the memory consumption
165 doesn't explode. We thus scale total_lines down to
166 line_record_size. */
167 vec<line_info, va_heap> m_line_record;
169 void offset_buffer (int offset)
171 gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
172 : (size_t) offset <= m_size);
173 gcc_assert (m_data);
174 m_alloc_offset += offset;
175 m_data += offset;
176 m_size -= offset;
181 /* Current position in real source file. */
183 location_t input_location = UNKNOWN_LOCATION;
185 class line_maps *line_table;
187 /* A stashed copy of "line_table" for use by selftest::line_table_test.
188 This needs to be a global so that it can be a GC root, and thus
189 prevent the stashed copy from being garbage-collected if the GC runs
190 during a line_table_test. */
192 class line_maps *saved_line_table;
194 /* Expand the source location LOC into a human readable location. If
195 LOC resolves to a builtin location, the file name of the readable
196 location is set to the string "<built-in>". If EXPANSION_POINT_P is
197 TRUE and LOC is virtual, then it is resolved to the expansion
198 point of the involved macro. Otherwise, it is resolved to the
199 spelling location of the token.
201 When resolving to the spelling location of the token, if the
202 resulting location is for a built-in location (that is, it has no
203 associated line/column) in the context of a macro expansion, the
204 returned location is the first one (while unwinding the macro
205 location towards its expansion point) that is in real source
206 code.
208 ASPECT controls which part of the location to use. */
210 static expanded_location
211 expand_location_1 (location_t loc,
212 bool expansion_point_p,
213 enum location_aspect aspect)
215 expanded_location xloc;
216 const line_map_ordinary *map;
217 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
218 tree block = NULL;
220 if (IS_ADHOC_LOC (loc))
222 block = LOCATION_BLOCK (loc);
223 loc = LOCATION_LOCUS (loc);
226 memset (&xloc, 0, sizeof (xloc));
228 if (loc >= RESERVED_LOCATION_COUNT)
230 if (!expansion_point_p)
232 /* We want to resolve LOC to its spelling location.
234 But if that spelling location is a reserved location that
235 appears in the context of a macro expansion (like for a
236 location for a built-in token), let's consider the first
237 location (toward the expansion point) that is not reserved;
238 that is, the first location that is in real source code. */
239 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
240 loc, NULL);
241 lrk = LRK_SPELLING_LOCATION;
243 loc = linemap_resolve_location (line_table, loc, lrk, &map);
245 /* loc is now either in an ordinary map, or is a reserved location.
246 If it is a compound location, the caret is in a spelling location,
247 but the start/finish might still be a virtual location.
248 Depending of what the caller asked for, we may need to recurse
249 one level in order to resolve any virtual locations in the
250 end-points. */
251 switch (aspect)
253 default:
254 gcc_unreachable ();
255 /* Fall through. */
256 case LOCATION_ASPECT_CARET:
257 break;
258 case LOCATION_ASPECT_START:
260 location_t start = get_start (loc);
261 if (start != loc)
262 return expand_location_1 (start, expansion_point_p, aspect);
264 break;
265 case LOCATION_ASPECT_FINISH:
267 location_t finish = get_finish (loc);
268 if (finish != loc)
269 return expand_location_1 (finish, expansion_point_p, aspect);
271 break;
273 xloc = linemap_expand_location (line_table, map, loc);
276 xloc.data = block;
277 if (loc <= BUILTINS_LOCATION)
278 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
280 return xloc;
283 /* Initialize the set of cache used for files accessed by caret
284 diagnostic. */
286 static void
287 diagnostic_file_cache_init (void)
289 gcc_assert (global_dc);
290 if (global_dc->m_file_cache == NULL)
291 global_dc->m_file_cache = new file_cache ();
294 /* Free the resources used by the set of cache used for files accessed
295 by caret diagnostic. */
297 void
298 diagnostic_file_cache_fini (void)
300 if (global_dc->m_file_cache)
302 delete global_dc->m_file_cache;
303 global_dc->m_file_cache = NULL;
307 /* Return the total lines number that have been read so far by the
308 line map (in the preprocessor) so far. For languages like C++ that
309 entirely preprocess the input file before starting to parse, this
310 equals the actual number of lines of the file. */
312 static size_t
313 total_lines_num (const char *file_path)
315 size_t r = 0;
316 location_t l = 0;
317 if (linemap_get_file_highest_location (line_table, file_path, &l))
319 gcc_assert (l >= RESERVED_LOCATION_COUNT);
320 expanded_location xloc = expand_location (l);
321 r = xloc.line;
323 return r;
326 /* Lookup the cache used for the content of a given file accessed by
327 caret diagnostic. Return the found cached file, or NULL if no
328 cached file was found. */
330 file_cache_slot *
331 file_cache::lookup_file (const char *file_path)
333 gcc_assert (file_path);
335 /* This will contain the found cached file. */
336 file_cache_slot *r = NULL;
337 for (unsigned i = 0; i < num_file_slots; ++i)
339 file_cache_slot *c = &m_file_slots[i];
340 if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
342 c->inc_use_count ();
343 r = c;
347 if (r)
348 r->inc_use_count ();
350 return r;
353 /* Purge any mention of FILENAME from the cache of files used for
354 printing source code. For use in selftests when working
355 with tempfiles. */
357 void
358 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
360 gcc_assert (file_path);
362 if (!global_dc->m_file_cache)
363 return;
365 global_dc->m_file_cache->forcibly_evict_file (file_path);
368 void
369 file_cache::forcibly_evict_file (const char *file_path)
371 gcc_assert (file_path);
373 file_cache_slot *r = lookup_file (file_path);
374 if (!r)
375 /* Not found. */
376 return;
378 r->evict ();
381 void
382 file_cache_slot::evict ()
384 m_file_path = NULL;
385 if (m_fp)
386 fclose (m_fp);
387 m_fp = NULL;
388 m_nb_read = 0;
389 m_line_start_idx = 0;
390 m_line_num = 0;
391 m_line_record.truncate (0);
392 m_use_count = 0;
393 m_total_lines = 0;
394 m_missing_trailing_newline = true;
397 /* Return the file cache that has been less used, recently, or the
398 first empty one. If HIGHEST_USE_COUNT is non-null,
399 *HIGHEST_USE_COUNT is set to the highest use count of the entries
400 in the cache table. */
402 file_cache_slot*
403 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
405 diagnostic_file_cache_init ();
407 file_cache_slot *to_evict = &m_file_slots[0];
408 unsigned huc = to_evict->get_use_count ();
409 for (unsigned i = 1; i < num_file_slots; ++i)
411 file_cache_slot *c = &m_file_slots[i];
412 bool c_is_empty = (c->get_file_path () == NULL);
414 if (c->get_use_count () < to_evict->get_use_count ()
415 || (to_evict->get_file_path () && c_is_empty))
416 /* We evict C because it's either an entry with a lower use
417 count or one that is empty. */
418 to_evict = c;
420 if (huc < c->get_use_count ())
421 huc = c->get_use_count ();
423 if (c_is_empty)
424 /* We've reached the end of the cache; subsequent elements are
425 all empty. */
426 break;
429 if (highest_use_count)
430 *highest_use_count = huc;
432 return to_evict;
435 /* Create the cache used for the content of a given file to be
436 accessed by caret diagnostic. This cache is added to an array of
437 cache and can be retrieved by lookup_file_in_cache_tab. This
438 function returns the created cache. Note that only the last
439 num_file_slots files are cached. */
441 file_cache_slot*
442 file_cache::add_file (const char *file_path)
445 FILE *fp = fopen (file_path, "r");
446 if (fp == NULL)
447 return NULL;
449 unsigned highest_use_count = 0;
450 file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
451 if (!r->create (in_context, file_path, fp, highest_use_count))
452 return NULL;
453 return r;
456 /* Populate this slot for use on FILE_PATH and FP, dropping any
457 existing cached content within it. */
459 bool
460 file_cache_slot::create (const file_cache::input_context &in_context,
461 const char *file_path, FILE *fp,
462 unsigned highest_use_count)
464 m_file_path = file_path;
465 if (m_fp)
466 fclose (m_fp);
467 m_fp = fp;
468 if (m_alloc_offset)
469 offset_buffer (-m_alloc_offset);
470 m_nb_read = 0;
471 m_line_start_idx = 0;
472 m_line_num = 0;
473 m_line_record.truncate (0);
474 /* Ensure that this cache entry doesn't get evicted next time
475 add_file_to_cache_tab is called. */
476 m_use_count = ++highest_use_count;
477 m_total_lines = total_lines_num (file_path);
478 m_missing_trailing_newline = true;
481 /* Check the input configuration to determine if we need to do any
482 transformations, such as charset conversion or BOM skipping. */
483 if (const char *input_charset = in_context.ccb (file_path))
485 /* Need a full-blown conversion of the input charset. */
486 fclose (m_fp);
487 m_fp = NULL;
488 const cpp_converted_source cs
489 = cpp_get_converted_source (file_path, input_charset);
490 if (!cs.data)
491 return false;
492 if (m_data)
493 XDELETEVEC (m_data);
494 m_data = cs.data;
495 m_nb_read = m_size = cs.len;
496 m_alloc_offset = cs.data - cs.to_free;
498 else if (in_context.should_skip_bom)
500 if (read_data ())
502 const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
503 offset_buffer (offset);
504 m_nb_read -= offset;
508 return true;
511 /* file_cache's ctor. */
513 file_cache::file_cache ()
514 : m_file_slots (new file_cache_slot[num_file_slots])
516 initialize_input_context (nullptr, false);
519 /* file_cache's dtor. */
521 file_cache::~file_cache ()
523 delete[] m_file_slots;
526 /* Lookup the cache used for the content of a given file accessed by
527 caret diagnostic. If no cached file was found, create a new cache
528 for this file, add it to the array of cached file and return
529 it. */
531 file_cache_slot*
532 file_cache::lookup_or_add_file (const char *file_path)
534 file_cache_slot *r = lookup_file (file_path);
535 if (r == NULL)
536 r = add_file (file_path);
537 return r;
540 /* Default constructor for a cache of file used by caret
541 diagnostic. */
543 file_cache_slot::file_cache_slot ()
544 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
545 m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
546 m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
548 m_line_record.create (0);
551 /* Destructor for a cache of file used by caret diagnostic. */
553 file_cache_slot::~file_cache_slot ()
555 if (m_fp)
557 fclose (m_fp);
558 m_fp = NULL;
560 if (m_data)
562 offset_buffer (-m_alloc_offset);
563 XDELETEVEC (m_data);
564 m_data = 0;
566 m_line_record.release ();
569 /* Returns TRUE iff the cache would need to be filled with data coming
570 from the file. That is, either the cache is empty or full or the
571 current line is empty. Note that if the cache is full, it would
572 need to be extended and filled again. */
574 bool
575 file_cache_slot::needs_read_p () const
577 return m_fp && (m_nb_read == 0
578 || m_nb_read == m_size
579 || (m_line_start_idx >= m_nb_read - 1));
582 /* Return TRUE iff the cache is full and thus needs to be
583 extended. */
585 bool
586 file_cache_slot::needs_grow_p () const
588 return m_nb_read == m_size;
591 /* Grow the cache if it needs to be extended. */
593 void
594 file_cache_slot::maybe_grow ()
596 if (!needs_grow_p ())
597 return;
599 if (!m_data)
601 gcc_assert (m_size == 0 && m_alloc_offset == 0);
602 m_size = buffer_size;
603 m_data = XNEWVEC (char, m_size);
605 else
607 const int offset = m_alloc_offset;
608 offset_buffer (-offset);
609 m_size *= 2;
610 m_data = XRESIZEVEC (char, m_data, m_size);
611 offset_buffer (offset);
615 /* Read more data into the cache. Extends the cache if need be.
616 Returns TRUE iff new data could be read. */
618 bool
619 file_cache_slot::read_data ()
621 if (feof (m_fp) || ferror (m_fp))
622 return false;
624 maybe_grow ();
626 char * from = m_data + m_nb_read;
627 size_t to_read = m_size - m_nb_read;
628 size_t nb_read = fread (from, 1, to_read, m_fp);
630 if (ferror (m_fp))
631 return false;
633 m_nb_read += nb_read;
634 return !!nb_read;
637 /* Read new data iff the cache needs to be filled with more data
638 coming from the file FP. Return TRUE iff the cache was filled with
639 mode data. */
641 bool
642 file_cache_slot::maybe_read_data ()
644 if (!needs_read_p ())
645 return false;
646 return read_data ();
649 /* Read a new line from file FP, using C as a cache for the data
650 coming from the file. Upon successful completion, *LINE is set to
651 the beginning of the line found. *LINE points directly in the
652 line cache and is only valid until the next call of get_next_line.
653 *LINE_LEN is set to the length of the line. Note that the line
654 does not contain any terminal delimiter. This function returns
655 true if some data was read or process from the cache, false
656 otherwise. Note that subsequent calls to get_next_line might
657 make the content of *LINE invalid. */
659 bool
660 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
662 /* Fill the cache with data to process. */
663 maybe_read_data ();
665 size_t remaining_size = m_nb_read - m_line_start_idx;
666 if (remaining_size == 0)
667 /* There is no more data to process. */
668 return false;
670 char *line_start = m_data + m_line_start_idx;
672 char *next_line_start = NULL;
673 size_t len = 0;
674 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
675 if (line_end == NULL)
677 /* We haven't found the end-of-line delimiter in the cache.
678 Fill the cache with more data from the file and look for the
679 '\n'. */
680 while (maybe_read_data ())
682 line_start = m_data + m_line_start_idx;
683 remaining_size = m_nb_read - m_line_start_idx;
684 line_end = (char *) memchr (line_start, '\n', remaining_size);
685 if (line_end != NULL)
687 next_line_start = line_end + 1;
688 break;
691 if (line_end == NULL)
693 /* We've loadded all the file into the cache and still no
694 '\n'. Let's say the line ends up at one byte passed the
695 end of the file. This is to stay consistent with the case
696 of when the line ends up with a '\n' and line_end points to
697 that terminal '\n'. That consistency is useful below in
698 the len calculation. */
699 line_end = m_data + m_nb_read ;
700 m_missing_trailing_newline = true;
702 else
703 m_missing_trailing_newline = false;
705 else
707 next_line_start = line_end + 1;
708 m_missing_trailing_newline = false;
711 if (m_fp && ferror (m_fp))
712 return false;
714 /* At this point, we've found the end of the of line. It either
715 points to the '\n' or to one byte after the last byte of the
716 file. */
717 gcc_assert (line_end != NULL);
719 len = line_end - line_start;
721 if (m_line_start_idx < m_nb_read)
722 *line = line_start;
724 ++m_line_num;
726 /* Before we update our line record, make sure the hint about the
727 total number of lines of the file is correct. If it's not, then
728 we give up recording line boundaries from now on. */
729 bool update_line_record = true;
730 if (m_line_num > m_total_lines)
731 update_line_record = false;
733 /* Now update our line record so that re-reading lines from the
734 before m_line_start_idx is faster. */
735 if (update_line_record
736 && m_line_record.length () < line_record_size)
738 /* If the file lines fits in the line record, we just record all
739 its lines ...*/
740 if (m_total_lines <= line_record_size
741 && m_line_num > m_line_record.length ())
742 m_line_record.safe_push
743 (file_cache_slot::line_info (m_line_num,
744 m_line_start_idx,
745 line_end - m_data));
746 else if (m_total_lines > line_record_size)
748 /* ... otherwise, we just scale total_lines down to
749 (line_record_size lines. */
750 size_t n = (m_line_num * line_record_size) / m_total_lines;
751 if (m_line_record.length () == 0
752 || n >= m_line_record.length ())
753 m_line_record.safe_push
754 (file_cache_slot::line_info (m_line_num,
755 m_line_start_idx,
756 line_end - m_data));
760 /* Update m_line_start_idx so that it points to the next line to be
761 read. */
762 if (next_line_start)
763 m_line_start_idx = next_line_start - m_data;
764 else
765 /* We didn't find any terminal '\n'. Let's consider that the end
766 of line is the end of the data in the cache. The next
767 invocation of get_next_line will either read more data from the
768 underlying file or return false early because we've reached the
769 end of the file. */
770 m_line_start_idx = m_nb_read;
772 *line_len = len;
774 return true;
777 /* Consume the next bytes coming from the cache (or from its
778 underlying file if there are remaining unread bytes in the file)
779 until we reach the next end-of-line (or end-of-file). There is no
780 copying from the cache involved. Return TRUE upon successful
781 completion. */
783 bool
784 file_cache_slot::goto_next_line ()
786 char *l;
787 ssize_t len;
789 return get_next_line (&l, &len);
792 /* Read an arbitrary line number LINE_NUM from the file cached in C.
793 If the line was read successfully, *LINE points to the beginning
794 of the line in the file cache and *LINE_LEN is the length of the
795 line. *LINE is not nul-terminated, but may contain zero bytes.
796 *LINE is only valid until the next call of read_line_num.
797 This function returns bool if a line was read. */
799 bool
800 file_cache_slot::read_line_num (size_t line_num,
801 char ** line, ssize_t *line_len)
803 gcc_assert (line_num > 0);
805 if (line_num <= m_line_num)
807 /* We've been asked to read lines that are before m_line_num.
808 So lets use our line record (if it's not empty) to try to
809 avoid re-reading the file from the beginning again. */
811 if (m_line_record.is_empty ())
813 m_line_start_idx = 0;
814 m_line_num = 0;
816 else
818 file_cache_slot::line_info *i = NULL;
819 if (m_total_lines <= line_record_size)
821 /* In languages where the input file is not totally
822 preprocessed up front, the m_total_lines hint
823 can be smaller than the number of lines of the
824 file. In that case, only the first
825 m_total_lines have been recorded.
827 Otherwise, the first m_total_lines we've read have
828 their start/end recorded here. */
829 i = (line_num <= m_total_lines)
830 ? &m_line_record[line_num - 1]
831 : &m_line_record[m_total_lines - 1];
832 gcc_assert (i->line_num <= line_num);
834 else
836 /* So the file had more lines than our line record
837 size. Thus the number of lines we've recorded has
838 been scaled down to line_record_size. Let's
839 pick the start/end of the recorded line that is
840 closest to line_num. */
841 size_t n = (line_num <= m_total_lines)
842 ? line_num * line_record_size / m_total_lines
843 : m_line_record.length () - 1;
844 if (n < m_line_record.length ())
846 i = &m_line_record[n];
847 gcc_assert (i->line_num <= line_num);
851 if (i && i->line_num == line_num)
853 /* We have the start/end of the line. */
854 *line = m_data + i->start_pos;
855 *line_len = i->end_pos - i->start_pos;
856 return true;
859 if (i)
861 m_line_start_idx = i->start_pos;
862 m_line_num = i->line_num - 1;
864 else
866 m_line_start_idx = 0;
867 m_line_num = 0;
872 /* Let's walk from line m_line_num up to line_num - 1, without
873 copying any line. */
874 while (m_line_num < line_num - 1)
875 if (!goto_next_line ())
876 return false;
878 /* The line we want is the next one. Let's read and copy it back to
879 the caller. */
880 return get_next_line (line, line_len);
883 /* Return the physical source line that corresponds to FILE_PATH/LINE.
884 The line is not nul-terminated. The returned pointer is only
885 valid until the next call of location_get_source_line.
886 Note that the line can contain several null characters,
887 so the returned value's length has the actual length of the line.
888 If the function fails, a NULL char_span is returned. */
890 char_span
891 location_get_source_line (const char *file_path, int line)
893 char *buffer = NULL;
894 ssize_t len;
896 if (line == 0)
897 return char_span (NULL, 0);
899 if (file_path == NULL)
900 return char_span (NULL, 0);
902 diagnostic_file_cache_init ();
904 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
905 if (c == NULL)
906 return char_span (NULL, 0);
908 bool read = c->read_line_num (line, &buffer, &len);
909 if (!read)
910 return char_span (NULL, 0);
912 return char_span (buffer, len);
915 /* Determine if FILE_PATH missing a trailing newline on its final line.
916 Only valid to call once all of the file has been loaded, by
917 requesting a line number beyond the end of the file. */
919 bool
920 location_missing_trailing_newline (const char *file_path)
922 diagnostic_file_cache_init ();
924 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
925 if (c == NULL)
926 return false;
928 return c->missing_trailing_newline_p ();
931 /* Test if the location originates from the spelling location of a
932 builtin-tokens. That is, return TRUE if LOC is a (possibly
933 virtual) location of a built-in token that appears in the expansion
934 list of a macro. Please note that this function also works on
935 tokens that result from built-in tokens. For instance, the
936 function would return true if passed a token "4" that is the result
937 of the expansion of the built-in __LINE__ macro. */
938 bool
939 is_location_from_builtin_token (location_t loc)
941 const line_map_ordinary *map = NULL;
942 loc = linemap_resolve_location (line_table, loc,
943 LRK_SPELLING_LOCATION, &map);
944 return loc == BUILTINS_LOCATION;
947 /* Expand the source location LOC into a human readable location. If
948 LOC is virtual, it resolves to the expansion point of the involved
949 macro. If LOC resolves to a builtin location, the file name of the
950 readable location is set to the string "<built-in>". */
952 expanded_location
953 expand_location (location_t loc)
955 return expand_location_1 (loc, /*expansion_point_p=*/true,
956 LOCATION_ASPECT_CARET);
959 /* Expand the source location LOC into a human readable location. If
960 LOC is virtual, it resolves to the expansion location of the
961 relevant macro. If LOC resolves to a builtin location, the file
962 name of the readable location is set to the string
963 "<built-in>". */
965 expanded_location
966 expand_location_to_spelling_point (location_t loc,
967 enum location_aspect aspect)
969 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
972 /* The rich_location class within libcpp requires a way to expand
973 location_t instances, and relies on the client code
974 providing a symbol named
975 linemap_client_expand_location_to_spelling_point
976 to do this.
978 This is the implementation for libcommon.a (all host binaries),
979 which simply calls into expand_location_1. */
981 expanded_location
982 linemap_client_expand_location_to_spelling_point (location_t loc,
983 enum location_aspect aspect)
985 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
989 /* If LOCATION is in a system header and if it is a virtual location
990 for a token coming from the expansion of a macro, unwind it to
991 the location of the expansion point of the macro. If the expansion
992 point is also in a system header return the original LOCATION.
993 Otherwise, return the location of the expansion point.
995 This is used for instance when we want to emit diagnostics about a
996 token that may be located in a macro that is itself defined in a
997 system header, for example, for the NULL macro. In such a case, if
998 LOCATION were passed directly to diagnostic functions such as
999 warning_at, the diagnostic would be suppressed (unless
1000 -Wsystem-headers). */
1002 location_t
1003 expansion_point_location_if_in_system_header (location_t location)
1005 if (!in_system_header_at (location))
1006 return location;
1008 location_t xloc = linemap_resolve_location (line_table, location,
1009 LRK_MACRO_EXPANSION_POINT,
1010 NULL);
1011 return in_system_header_at (xloc) ? location : xloc;
1014 /* If LOCATION is a virtual location for a token coming from the expansion
1015 of a macro, unwind to the location of the expansion point of the macro. */
1017 location_t
1018 expansion_point_location (location_t location)
1020 return linemap_resolve_location (line_table, location,
1021 LRK_MACRO_EXPANSION_POINT, NULL);
1024 /* Construct a location with caret at CARET, ranging from START to
1025 finish e.g.
1027 11111111112
1028 12345678901234567890
1030 523 return foo + bar;
1031 ~~~~^~~~~
1034 The location's caret is at the "+", line 523 column 15, but starts
1035 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1036 of "bar" at column 19. */
1038 location_t
1039 make_location (location_t caret, location_t start, location_t finish)
1041 location_t pure_loc = get_pure_location (caret);
1042 source_range src_range;
1043 src_range.m_start = get_start (start);
1044 src_range.m_finish = get_finish (finish);
1045 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
1046 pure_loc,
1047 src_range,
1048 NULL);
1049 return combined_loc;
1052 /* Same as above, but taking a source range rather than two locations. */
1054 location_t
1055 make_location (location_t caret, source_range src_range)
1057 location_t pure_loc = get_pure_location (caret);
1058 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
1061 /* An expanded_location stores the column in byte units. This function
1062 converts that column to display units. That requires reading the associated
1063 source line in order to calculate the display width. If that cannot be done
1064 for any reason, then returns the byte column as a fallback. */
1066 location_compute_display_column (expanded_location exploc,
1067 const cpp_char_column_policy &policy)
1069 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1070 return exploc.column;
1071 char_span line = location_get_source_line (exploc.file, exploc.line);
1072 /* If line is NULL, this function returns exploc.column which is the
1073 desired fallback. */
1074 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1075 exploc.column, policy);
1078 /* Dump statistics to stderr about the memory usage of the line_table
1079 set of line maps. This also displays some statistics about macro
1080 expansion. */
1082 void
1083 dump_line_table_statistics (void)
1085 struct linemap_stats s;
1086 long total_used_map_size,
1087 macro_maps_size,
1088 total_allocated_map_size;
1090 memset (&s, 0, sizeof (s));
1092 linemap_get_statistics (line_table, &s);
1094 macro_maps_size = s.macro_maps_used_size
1095 + s.macro_maps_locations_size;
1097 total_allocated_map_size = s.ordinary_maps_allocated_size
1098 + s.macro_maps_allocated_size
1099 + s.macro_maps_locations_size;
1101 total_used_map_size = s.ordinary_maps_used_size
1102 + s.macro_maps_used_size
1103 + s.macro_maps_locations_size;
1105 fprintf (stderr, "Number of expanded macros: %5ld\n",
1106 s.num_expanded_macros);
1107 if (s.num_expanded_macros != 0)
1108 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
1109 s.num_macro_tokens / s.num_expanded_macros);
1110 fprintf (stderr,
1111 "\nLine Table allocations during the "
1112 "compilation process\n");
1113 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
1114 SIZE_AMOUNT (s.num_ordinary_maps_used));
1115 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
1116 SIZE_AMOUNT (s.ordinary_maps_used_size));
1117 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
1118 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1119 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
1120 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1121 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
1122 SIZE_AMOUNT (s.num_macro_maps_used));
1123 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
1124 SIZE_AMOUNT (s.macro_maps_used_size));
1125 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
1126 SIZE_AMOUNT (s.macro_maps_locations_size));
1127 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
1128 SIZE_AMOUNT (macro_maps_size));
1129 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
1130 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1131 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
1132 SIZE_AMOUNT (total_allocated_map_size));
1133 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
1134 SIZE_AMOUNT (total_used_map_size));
1135 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
1136 SIZE_AMOUNT (s.adhoc_table_size));
1137 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
1138 SIZE_AMOUNT (s.adhoc_table_entries_used));
1139 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
1140 SIZE_AMOUNT (line_table->num_optimized_ranges));
1141 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
1142 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1144 fprintf (stderr, "\n");
1147 /* Get location one beyond the final location in ordinary map IDX. */
1149 static location_t
1150 get_end_location (class line_maps *set, unsigned int idx)
1152 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1153 return set->highest_location;
1155 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1156 return MAP_START_LOCATION (next_map);
1159 /* Helper function for write_digit_row. */
1161 static void
1162 write_digit (FILE *stream, int digit)
1164 fputc ('0' + (digit % 10), stream);
1167 /* Helper function for dump_location_info.
1168 Write a row of numbers to STREAM, numbering a source line,
1169 giving the units, tens, hundreds etc of the column number. */
1171 static void
1172 write_digit_row (FILE *stream, int indent,
1173 const line_map_ordinary *map,
1174 location_t loc, int max_col, int divisor)
1176 fprintf (stream, "%*c", indent, ' ');
1177 fprintf (stream, "|");
1178 for (int column = 1; column < max_col; column++)
1180 location_t column_loc = loc + (column << map->m_range_bits);
1181 write_digit (stream, column_loc / divisor);
1183 fprintf (stream, "\n");
1186 /* Write a half-closed (START) / half-open (END) interval of
1187 location_t to STREAM. */
1189 static void
1190 dump_location_range (FILE *stream,
1191 location_t start, location_t end)
1193 fprintf (stream,
1194 " location_t interval: %u <= loc < %u\n",
1195 start, end);
1198 /* Write a labelled description of a half-closed (START) / half-open (END)
1199 interval of location_t to STREAM. */
1201 static void
1202 dump_labelled_location_range (FILE *stream,
1203 const char *name,
1204 location_t start, location_t end)
1206 fprintf (stream, "%s\n", name);
1207 dump_location_range (stream, start, end);
1208 fprintf (stream, "\n");
1211 /* Write a visualization of the locations in the line_table to STREAM. */
1213 void
1214 dump_location_info (FILE *stream)
1216 /* Visualize the reserved locations. */
1217 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1218 0, RESERVED_LOCATION_COUNT);
1220 /* Visualize the ordinary line_map instances, rendering the sources. */
1221 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1223 location_t end_location = get_end_location (line_table, idx);
1224 /* half-closed: doesn't include this one. */
1226 const line_map_ordinary *map
1227 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1228 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1229 dump_location_range (stream,
1230 MAP_START_LOCATION (map), end_location);
1231 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1232 fprintf (stream, " starting at line: %i\n",
1233 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1234 fprintf (stream, " column and range bits: %i\n",
1235 map->m_column_and_range_bits);
1236 fprintf (stream, " column bits: %i\n",
1237 map->m_column_and_range_bits - map->m_range_bits);
1238 fprintf (stream, " range bits: %i\n",
1239 map->m_range_bits);
1240 const char * reason;
1241 switch (map->reason) {
1242 case LC_ENTER:
1243 reason = "LC_ENTER";
1244 break;
1245 case LC_LEAVE:
1246 reason = "LC_LEAVE";
1247 break;
1248 case LC_RENAME:
1249 reason = "LC_RENAME";
1250 break;
1251 case LC_RENAME_VERBATIM:
1252 reason = "LC_RENAME_VERBATIM";
1253 break;
1254 case LC_ENTER_MACRO:
1255 reason = "LC_RENAME_MACRO";
1256 break;
1257 default:
1258 reason = "Unknown";
1260 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1262 const line_map_ordinary *includer_map
1263 = linemap_included_from_linemap (line_table, map);
1264 fprintf (stream, " included from location: %d",
1265 linemap_included_from (map));
1266 if (includer_map) {
1267 fprintf (stream, " (in ordinary map %d)",
1268 int (includer_map - line_table->info_ordinary.maps));
1270 fprintf (stream, "\n");
1272 /* Render the span of source lines that this "map" covers. */
1273 for (location_t loc = MAP_START_LOCATION (map);
1274 loc < end_location;
1275 loc += (1 << map->m_range_bits) )
1277 gcc_assert (pure_location_p (line_table, loc) );
1279 expanded_location exploc
1280 = linemap_expand_location (line_table, map, loc);
1282 if (exploc.column == 0)
1284 /* Beginning of a new source line: draw the line. */
1286 char_span line_text = location_get_source_line (exploc.file,
1287 exploc.line);
1288 if (!line_text)
1289 break;
1290 fprintf (stream,
1291 "%s:%3i|loc:%5i|%.*s\n",
1292 exploc.file, exploc.line,
1293 loc,
1294 (int)line_text.length (), line_text.get_buffer ());
1296 /* "loc" is at column 0, which means "the whole line".
1297 Render the locations *within* the line, by underlining
1298 it, showing the location_t numeric values
1299 at each column. */
1300 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1301 if (max_col > line_text.length ())
1302 max_col = line_text.length () + 1;
1304 int len_lnum = num_digits (exploc.line);
1305 if (len_lnum < 3)
1306 len_lnum = 3;
1307 int len_loc = num_digits (loc);
1308 if (len_loc < 5)
1309 len_loc = 5;
1311 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1313 /* Thousands. */
1314 if (end_location > 999)
1315 write_digit_row (stream, indent, map, loc, max_col, 1000);
1317 /* Hundreds. */
1318 if (end_location > 99)
1319 write_digit_row (stream, indent, map, loc, max_col, 100);
1321 /* Tens. */
1322 write_digit_row (stream, indent, map, loc, max_col, 10);
1324 /* Units. */
1325 write_digit_row (stream, indent, map, loc, max_col, 1);
1328 fprintf (stream, "\n");
1331 /* Visualize unallocated values. */
1332 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1333 line_table->highest_location,
1334 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1336 /* Visualize the macro line_map instances, rendering the sources. */
1337 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1339 /* Each macro map that is allocated owns location_t values
1340 that are *lower* that the one before them.
1341 Hence it's meaningful to view them either in order of ascending
1342 source locations, or in order of ascending macro map index. */
1343 const bool ascending_location_ts = true;
1344 unsigned int idx = (ascending_location_ts
1345 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1346 : i);
1347 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1348 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1349 idx,
1350 linemap_map_get_macro_name (map),
1351 MACRO_MAP_NUM_MACRO_TOKENS (map));
1352 dump_location_range (stream,
1353 map->start_location,
1354 (map->start_location
1355 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1356 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1357 "expansion point is location %i",
1358 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1359 fprintf (stream, " map->start_location: %u\n",
1360 map->start_location);
1362 fprintf (stream, " macro_locations:\n");
1363 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1365 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1366 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1368 /* linemap_add_macro_token encodes token numbers in an expansion
1369 by putting them after MAP_START_LOCATION. */
1371 /* I'm typically seeing 4 uninitialized entries at the end of
1372 0xafafafaf.
1373 This appears to be due to macro.cc:replace_args
1374 adding 2 extra args for padding tokens; presumably there may
1375 be a leading and/or trailing padding token injected,
1376 each for 2 more location slots.
1377 This would explain there being up to 4 location_ts slots
1378 that may be uninitialized. */
1380 fprintf (stream, " %u: %u, %u\n",
1384 if (x == y)
1386 if (x < MAP_START_LOCATION (map))
1387 inform (x, "token %u has %<x-location == y-location == %u%>",
1388 i, x);
1389 else
1390 fprintf (stream,
1391 "x-location == y-location == %u encodes token # %u\n",
1392 x, x - MAP_START_LOCATION (map));
1394 else
1396 inform (x, "token %u has %<x-location == %u%>", i, x);
1397 inform (x, "token %u has %<y-location == %u%>", i, y);
1400 fprintf (stream, "\n");
1403 /* It appears that MAX_LOCATION_T itself is never assigned to a
1404 macro map, presumably due to an off-by-one error somewhere
1405 between the logic in linemap_enter_macro and
1406 LINEMAPS_MACRO_LOWEST_LOCATION. */
1407 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1408 MAX_LOCATION_T,
1409 MAX_LOCATION_T + 1);
1411 /* Visualize ad-hoc values. */
1412 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1413 MAX_LOCATION_T + 1, UINT_MAX);
1416 /* string_concat's constructor. */
1418 string_concat::string_concat (int num, location_t *locs)
1419 : m_num (num)
1421 m_locs = ggc_vec_alloc <location_t> (num);
1422 for (int i = 0; i < num; i++)
1423 m_locs[i] = locs[i];
1426 /* string_concat_db's constructor. */
1428 string_concat_db::string_concat_db ()
1430 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1433 /* Record that a string concatenation occurred, covering NUM
1434 string literal tokens. LOCS is an array of size NUM, containing the
1435 locations of the tokens. A copy of LOCS is taken. */
1437 void
1438 string_concat_db::record_string_concatenation (int num, location_t *locs)
1440 gcc_assert (num > 1);
1441 gcc_assert (locs);
1443 location_t key_loc = get_key_loc (locs[0]);
1444 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1445 any data now recorded under key 'key_loc' would be overwritten by a
1446 subsequent call with the same key 'key_loc'. */
1447 if (RESERVED_LOCATION_P (key_loc))
1448 return;
1450 string_concat *concat
1451 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1452 m_table->put (key_loc, concat);
1455 /* Determine if LOC was the location of the initial token of a
1456 concatenation of string literal tokens.
1457 If so, *OUT_NUM is written to with the number of tokens, and
1458 *OUT_LOCS with the location of an array of locations of the
1459 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1460 storage owned by the string_concat_db.
1461 Otherwise, return false. */
1463 bool
1464 string_concat_db::get_string_concatenation (location_t loc,
1465 int *out_num,
1466 location_t **out_locs)
1468 gcc_assert (out_num);
1469 gcc_assert (out_locs);
1471 location_t key_loc = get_key_loc (loc);
1472 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1473 discussion in 'string_concat_db::record_string_concatenation'. */
1474 if (RESERVED_LOCATION_P (key_loc))
1475 return false;
1477 string_concat **concat = m_table->get (key_loc);
1478 if (!concat)
1479 return false;
1481 *out_num = (*concat)->m_num;
1482 *out_locs =(*concat)->m_locs;
1483 return true;
1486 /* Internal function. Canonicalize LOC into a form suitable for
1487 use as a key within the database, stripping away macro expansion,
1488 ad-hoc information, and range information, using the location of
1489 the start of LOC within an ordinary linemap. */
1491 location_t
1492 string_concat_db::get_key_loc (location_t loc)
1494 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1495 NULL);
1497 loc = get_range_from_loc (line_table, loc).m_start;
1499 return loc;
1502 /* Helper class for use within get_substring_ranges_for_loc.
1503 An vec of cpp_string with responsibility for releasing all of the
1504 str->text for each str in the vector. */
1506 class auto_cpp_string_vec : public auto_vec <cpp_string>
1508 public:
1509 auto_cpp_string_vec (int alloc)
1510 : auto_vec <cpp_string> (alloc) {}
1512 ~auto_cpp_string_vec ()
1514 /* Clean up the copies within this vec. */
1515 int i;
1516 cpp_string *str;
1517 FOR_EACH_VEC_ELT (*this, i, str)
1518 free (const_cast <unsigned char *> (str->text));
1522 /* Attempt to populate RANGES with source location information on the
1523 individual characters within the string literal found at STRLOC.
1524 If CONCATS is non-NULL, then any string literals that the token at
1525 STRLOC was concatenated with are also added to RANGES.
1527 Return NULL if successful, or an error message if any errors occurred (in
1528 which case RANGES may be only partially populated and should not
1529 be used).
1531 This is implemented by re-parsing the relevant source line(s). */
1533 static const char *
1534 get_substring_ranges_for_loc (cpp_reader *pfile,
1535 string_concat_db *concats,
1536 location_t strloc,
1537 enum cpp_ttype type,
1538 cpp_substring_ranges &ranges)
1540 gcc_assert (pfile);
1542 if (strloc == UNKNOWN_LOCATION)
1543 return "unknown location";
1545 /* Reparsing the strings requires accurate location information.
1546 If -ftrack-macro-expansion has been overridden from its default
1547 of 2, then we might have a location of a macro expansion point,
1548 rather than the location of the literal itself.
1549 Avoid this by requiring that we have full macro expansion tracking
1550 for substring locations to be available. */
1551 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1552 return "track_macro_expansion != 2";
1554 /* If #line or # 44 "file"-style directives are present, then there's
1555 no guarantee that the line numbers we have can be used to locate
1556 the strings. For example, we might have a .i file with # directives
1557 pointing back to lines within a .c file, but the .c file might
1558 have been edited since the .i file was created.
1559 In such a case, the safest course is to disable on-demand substring
1560 locations. */
1561 if (line_table->seen_line_directive)
1562 return "seen line directive";
1564 /* If string concatenation has occurred at STRLOC, get the locations
1565 of all of the literal tokens making up the compound string.
1566 Otherwise, just use STRLOC. */
1567 int num_locs = 1;
1568 location_t *strlocs = &strloc;
1569 if (concats)
1570 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1572 auto_cpp_string_vec strs (num_locs);
1573 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1574 for (int i = 0; i < num_locs; i++)
1576 /* Get range of strloc. We will use it to locate the start and finish
1577 of the literal token within the line. */
1578 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1580 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1582 /* If the string token was within a macro expansion, then we can
1583 cope with it for the simple case where we have a single token.
1584 Otherwise, bail out. */
1585 if (src_range.m_start != src_range.m_finish)
1586 return "macro expansion";
1588 else
1590 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1591 /* If so, we can't reliably determine where the token started within
1592 its line. */
1593 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1595 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1596 /* If so, we can't reliably determine where the token finished
1597 within its line. */
1598 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1601 expanded_location start
1602 = expand_location_to_spelling_point (src_range.m_start,
1603 LOCATION_ASPECT_START);
1604 expanded_location finish
1605 = expand_location_to_spelling_point (src_range.m_finish,
1606 LOCATION_ASPECT_FINISH);
1607 if (start.file != finish.file)
1608 return "range endpoints are in different files";
1609 if (start.line != finish.line)
1610 return "range endpoints are on different lines";
1611 if (start.column > finish.column)
1612 return "range endpoints are reversed";
1614 char_span line = location_get_source_line (start.file, start.line);
1615 if (!line)
1616 return "unable to read source line";
1618 /* Determine the location of the literal (including quotes
1619 and leading prefix chars, such as the 'u' in a u""
1620 token). */
1621 size_t literal_length = finish.column - start.column + 1;
1623 /* Ensure that we don't crash if we got the wrong location. */
1624 if (start.column < 1)
1625 return "zero start column";
1626 if (line.length () < (start.column - 1 + literal_length))
1627 return "line is not wide enough";
1629 char_span literal = line.subspan (start.column - 1, literal_length);
1631 cpp_string from;
1632 from.len = literal_length;
1633 /* Make a copy of the literal, to avoid having to rely on
1634 the lifetime of the copy of the line within the cache.
1635 This will be released by the auto_cpp_string_vec dtor. */
1636 from.text = (unsigned char *)literal.xstrdup ();
1637 strs.safe_push (from);
1639 /* For very long lines, a new linemap could have started
1640 halfway through the token.
1641 Ensure that the loc_reader uses the linemap of the
1642 *end* of the token for its start location. */
1643 const line_map_ordinary *start_ord_map;
1644 linemap_resolve_location (line_table, src_range.m_start,
1645 LRK_SPELLING_LOCATION, &start_ord_map);
1646 const line_map_ordinary *final_ord_map;
1647 linemap_resolve_location (line_table, src_range.m_finish,
1648 LRK_SPELLING_LOCATION, &final_ord_map);
1649 if (start_ord_map == NULL || final_ord_map == NULL)
1650 return "failed to get ordinary maps";
1651 /* Bulletproofing. We ought to only have different ordinary maps
1652 for start vs finish due to line-length jumps. */
1653 if (start_ord_map != final_ord_map
1654 && start_ord_map->to_file != final_ord_map->to_file)
1655 return "start and finish are spelled in different ordinary maps";
1656 /* The file from linemap_resolve_location ought to match that from
1657 expand_location_to_spelling_point. */
1658 if (start_ord_map->to_file != start.file)
1659 return "mismatching file after resolving linemap";
1661 location_t start_loc
1662 = linemap_position_for_line_and_column (line_table, final_ord_map,
1663 start.line, start.column);
1665 cpp_string_location_reader loc_reader (start_loc, line_table);
1666 loc_readers.safe_push (loc_reader);
1669 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1670 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1671 loc_readers.address (),
1672 num_locs, &ranges, type);
1673 if (err)
1674 return err;
1676 /* Success: "ranges" should now contain information on the string. */
1677 return NULL;
1680 /* Attempt to populate *OUT_LOC with source location information on the
1681 given characters within the string literal found at STRLOC.
1682 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1683 character set.
1685 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1686 and string literal "012345\n789"
1687 *OUT_LOC is written to with:
1688 "012345\n789"
1689 ~^~~~~
1691 If CONCATS is non-NULL, then any string literals that the token at
1692 STRLOC was concatenated with are also considered.
1694 This is implemented by re-parsing the relevant source line(s).
1696 Return NULL if successful, or an error message if any errors occurred.
1697 Error messages are intended for GCC developers (to help debugging) rather
1698 than for end-users. */
1700 const char *
1701 get_location_within_string (cpp_reader *pfile,
1702 string_concat_db *concats,
1703 location_t strloc,
1704 enum cpp_ttype type,
1705 int caret_idx, int start_idx, int end_idx,
1706 location_t *out_loc)
1708 gcc_checking_assert (caret_idx >= 0);
1709 gcc_checking_assert (start_idx >= 0);
1710 gcc_checking_assert (end_idx >= 0);
1711 gcc_assert (out_loc);
1713 cpp_substring_ranges ranges;
1714 const char *err
1715 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1716 if (err)
1717 return err;
1719 if (caret_idx >= ranges.get_num_ranges ())
1720 return "caret_idx out of range";
1721 if (start_idx >= ranges.get_num_ranges ())
1722 return "start_idx out of range";
1723 if (end_idx >= ranges.get_num_ranges ())
1724 return "end_idx out of range";
1726 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1727 ranges.get_range (start_idx).m_start,
1728 ranges.get_range (end_idx).m_finish);
1729 return NULL;
1732 #if CHECKING_P
1734 namespace selftest {
1736 /* Selftests of location handling. */
1738 /* Attempt to populate *OUT_RANGE with source location information on the
1739 given character within the string literal found at STRLOC.
1740 CHAR_IDX refers to an offset within the execution character set.
1741 If CONCATS is non-NULL, then any string literals that the token at
1742 STRLOC was concatenated with are also considered.
1744 This is implemented by re-parsing the relevant source line(s).
1746 Return NULL if successful, or an error message if any errors occurred.
1747 Error messages are intended for GCC developers (to help debugging) rather
1748 than for end-users. */
1750 static const char *
1751 get_source_range_for_char (cpp_reader *pfile,
1752 string_concat_db *concats,
1753 location_t strloc,
1754 enum cpp_ttype type,
1755 int char_idx,
1756 source_range *out_range)
1758 gcc_checking_assert (char_idx >= 0);
1759 gcc_assert (out_range);
1761 cpp_substring_ranges ranges;
1762 const char *err
1763 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1764 if (err)
1765 return err;
1767 if (char_idx >= ranges.get_num_ranges ())
1768 return "char_idx out of range";
1770 *out_range = ranges.get_range (char_idx);
1771 return NULL;
1774 /* As get_source_range_for_char, but write to *OUT the number
1775 of ranges that are available. */
1777 static const char *
1778 get_num_source_ranges_for_substring (cpp_reader *pfile,
1779 string_concat_db *concats,
1780 location_t strloc,
1781 enum cpp_ttype type,
1782 int *out)
1784 gcc_assert (out);
1786 cpp_substring_ranges ranges;
1787 const char *err
1788 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1790 if (err)
1791 return err;
1793 *out = ranges.get_num_ranges ();
1794 return NULL;
1797 /* Selftests of location handling. */
1799 /* Verify that compare() on linenum_type handles comparisons over the full
1800 range of the type. */
1802 static void
1803 test_linenum_comparisons ()
1805 linenum_type min_line (0);
1806 linenum_type max_line (0xffffffff);
1807 ASSERT_EQ (0, compare (min_line, min_line));
1808 ASSERT_EQ (0, compare (max_line, max_line));
1810 ASSERT_GT (compare (max_line, min_line), 0);
1811 ASSERT_LT (compare (min_line, max_line), 0);
1814 /* Helper function for verifying location data: when location_t
1815 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1816 as having column 0. */
1818 static bool
1819 should_have_column_data_p (location_t loc)
1821 if (IS_ADHOC_LOC (loc))
1822 loc = get_location_from_adhoc_loc (line_table, loc);
1823 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1824 return false;
1825 return true;
1828 /* Selftest for should_have_column_data_p. */
1830 static void
1831 test_should_have_column_data_p ()
1833 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1834 ASSERT_TRUE
1835 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1836 ASSERT_FALSE
1837 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1840 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1841 on LOC. */
1843 static void
1844 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1845 location_t loc)
1847 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1848 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1849 /* If location_t values are sufficiently high, then column numbers
1850 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1851 When close to the threshold, column numbers *may* be present: if
1852 the final linemap before the threshold contains a line that straddles
1853 the threshold, locations in that line have column information. */
1854 if (should_have_column_data_p (loc))
1855 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1858 /* Various selftests involve constructing a line table and one or more
1859 line maps within it.
1861 For maximum test coverage we want to run these tests with a variety
1862 of situations:
1863 - line_table->default_range_bits: some frontends use a non-zero value
1864 and others use zero
1865 - the fallback modes within line-map.cc: there are various threshold
1866 values for location_t beyond line-map.cc changes
1867 behavior (disabling of the range-packing optimization, disabling
1868 of column-tracking). We can exercise these by starting the line_table
1869 at interesting values at or near these thresholds.
1871 The following struct describes a particular case within our test
1872 matrix. */
1874 class line_table_case
1876 public:
1877 line_table_case (int default_range_bits, int base_location)
1878 : m_default_range_bits (default_range_bits),
1879 m_base_location (base_location)
1882 int m_default_range_bits;
1883 int m_base_location;
1886 /* Constructor. Store the old value of line_table, and create a new
1887 one, using sane defaults. */
1889 line_table_test::line_table_test ()
1891 gcc_assert (saved_line_table == NULL);
1892 saved_line_table = line_table;
1893 line_table = ggc_alloc<line_maps> ();
1894 linemap_init (line_table, BUILTINS_LOCATION);
1895 gcc_assert (saved_line_table->reallocator);
1896 line_table->reallocator = saved_line_table->reallocator;
1897 gcc_assert (saved_line_table->round_alloc_size);
1898 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1899 line_table->default_range_bits = 0;
1902 /* Constructor. Store the old value of line_table, and create a new
1903 one, using the sitation described in CASE_. */
1905 line_table_test::line_table_test (const line_table_case &case_)
1907 gcc_assert (saved_line_table == NULL);
1908 saved_line_table = line_table;
1909 line_table = ggc_alloc<line_maps> ();
1910 linemap_init (line_table, BUILTINS_LOCATION);
1911 gcc_assert (saved_line_table->reallocator);
1912 line_table->reallocator = saved_line_table->reallocator;
1913 gcc_assert (saved_line_table->round_alloc_size);
1914 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1915 line_table->default_range_bits = case_.m_default_range_bits;
1916 if (case_.m_base_location)
1918 line_table->highest_location = case_.m_base_location;
1919 line_table->highest_line = case_.m_base_location;
1923 /* Destructor. Restore the old value of line_table. */
1925 line_table_test::~line_table_test ()
1927 gcc_assert (saved_line_table != NULL);
1928 line_table = saved_line_table;
1929 saved_line_table = NULL;
1932 /* Verify basic operation of ordinary linemaps. */
1934 static void
1935 test_accessing_ordinary_linemaps (const line_table_case &case_)
1937 line_table_test ltt (case_);
1939 /* Build a simple linemap describing some locations. */
1940 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1942 linemap_line_start (line_table, 1, 100);
1943 location_t loc_a = linemap_position_for_column (line_table, 1);
1944 location_t loc_b = linemap_position_for_column (line_table, 23);
1946 linemap_line_start (line_table, 2, 100);
1947 location_t loc_c = linemap_position_for_column (line_table, 1);
1948 location_t loc_d = linemap_position_for_column (line_table, 17);
1950 /* Example of a very long line. */
1951 linemap_line_start (line_table, 3, 2000);
1952 location_t loc_e = linemap_position_for_column (line_table, 700);
1954 /* Transitioning back to a short line. */
1955 linemap_line_start (line_table, 4, 0);
1956 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1958 if (should_have_column_data_p (loc_back_to_short))
1960 /* Verify that we switched to short lines in the linemap. */
1961 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1962 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1965 /* Example of a line that will eventually be seen to be longer
1966 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1967 below that. */
1968 linemap_line_start (line_table, 5, 2000);
1970 location_t loc_start_of_very_long_line
1971 = linemap_position_for_column (line_table, 2000);
1972 location_t loc_too_wide
1973 = linemap_position_for_column (line_table, 4097);
1974 location_t loc_too_wide_2
1975 = linemap_position_for_column (line_table, 4098);
1977 /* ...and back to a sane line length. */
1978 linemap_line_start (line_table, 6, 100);
1979 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1981 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1983 /* Multiple files. */
1984 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1985 linemap_line_start (line_table, 1, 200);
1986 location_t loc_f = linemap_position_for_column (line_table, 150);
1987 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1989 /* Verify that we can recover the location info. */
1990 assert_loceq ("foo.c", 1, 1, loc_a);
1991 assert_loceq ("foo.c", 1, 23, loc_b);
1992 assert_loceq ("foo.c", 2, 1, loc_c);
1993 assert_loceq ("foo.c", 2, 17, loc_d);
1994 assert_loceq ("foo.c", 3, 700, loc_e);
1995 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1997 /* In the very wide line, the initial location should be fully tracked. */
1998 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1999 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2000 be disabled. */
2001 assert_loceq ("foo.c", 5, 0, loc_too_wide);
2002 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2003 /*...and column-tracking should be re-enabled for subsequent lines. */
2004 assert_loceq ("foo.c", 6, 10, loc_sane_again);
2006 assert_loceq ("bar.c", 1, 150, loc_f);
2008 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2009 ASSERT_TRUE (pure_location_p (line_table, loc_a));
2011 /* Verify using make_location to build a range, and extracting data
2012 back from it. */
2013 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2014 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2015 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2016 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2017 ASSERT_EQ (loc_b, src_range.m_start);
2018 ASSERT_EQ (loc_d, src_range.m_finish);
2021 /* Verify various properties of UNKNOWN_LOCATION. */
2023 static void
2024 test_unknown_location ()
2026 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2027 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2028 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2031 /* Verify various properties of BUILTINS_LOCATION. */
2033 static void
2034 test_builtins ()
2036 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
2037 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2040 /* Regression test for make_location.
2041 Ensure that we use pure locations for the start/finish of the range,
2042 rather than storing a packed or ad-hoc range as the start/finish. */
2044 static void
2045 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2047 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2048 with C++ frontend.
2049 ....................0000000001111111111222.
2050 ....................1234567890123456789012. */
2051 const char *content = " r += !aaa == bbb;\n";
2052 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2053 line_table_test ltt (case_);
2054 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2056 const location_t c11 = linemap_position_for_column (line_table, 11);
2057 const location_t c12 = linemap_position_for_column (line_table, 12);
2058 const location_t c13 = linemap_position_for_column (line_table, 13);
2059 const location_t c14 = linemap_position_for_column (line_table, 14);
2060 const location_t c21 = linemap_position_for_column (line_table, 21);
2062 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2063 return;
2065 /* Use column 13 for the caret location, arbitrarily, to verify that we
2066 handle start != caret. */
2067 const location_t aaa = make_location (c13, c12, c14);
2068 ASSERT_EQ (c13, get_pure_location (aaa));
2069 ASSERT_EQ (c12, get_start (aaa));
2070 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2071 ASSERT_EQ (c14, get_finish (aaa));
2072 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2074 /* Make a location using a location with a range as the start-point. */
2075 const location_t not_aaa = make_location (c11, aaa, c14);
2076 ASSERT_EQ (c11, get_pure_location (not_aaa));
2077 /* It should use the start location of the range, not store the range
2078 itself. */
2079 ASSERT_EQ (c12, get_start (not_aaa));
2080 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2081 ASSERT_EQ (c14, get_finish (not_aaa));
2082 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2084 /* Similarly, make a location with a range as the end-point. */
2085 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2086 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2087 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2088 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2089 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2090 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2091 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2092 /* It should use the finish location of the range, not store the range
2093 itself. */
2094 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2095 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2096 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2097 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2098 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2101 /* Verify reading of input files (e.g. for caret-based diagnostics). */
2103 static void
2104 test_reading_source_line ()
2106 /* Create a tempfile and write some text to it. */
2107 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2108 "01234567890123456789\n"
2109 "This is the test text\n"
2110 "This is the 3rd line");
2112 /* Read back a specific line from the tempfile. */
2113 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2114 ASSERT_TRUE (source_line);
2115 ASSERT_TRUE (source_line.get_buffer () != NULL);
2116 ASSERT_EQ (20, source_line.length ());
2117 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2118 source_line.get_buffer (), source_line.length ()));
2120 source_line = location_get_source_line (tmp.get_filename (), 2);
2121 ASSERT_TRUE (source_line);
2122 ASSERT_TRUE (source_line.get_buffer () != NULL);
2123 ASSERT_EQ (21, source_line.length ());
2124 ASSERT_TRUE (!strncmp ("This is the test text",
2125 source_line.get_buffer (), source_line.length ()));
2127 source_line = location_get_source_line (tmp.get_filename (), 4);
2128 ASSERT_FALSE (source_line);
2129 ASSERT_TRUE (source_line.get_buffer () == NULL);
2132 /* Tests of lexing. */
2134 /* Verify that token TOK from PARSER has cpp_token_as_text
2135 equal to EXPECTED_TEXT. */
2137 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2138 SELFTEST_BEGIN_STMT \
2139 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2140 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2141 SELFTEST_END_STMT
2143 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2144 and ranges from EXP_START_COL to EXP_FINISH_COL.
2145 Use LOC as the effective location of the selftest. */
2147 static void
2148 assert_token_loc_eq (const location &loc,
2149 const cpp_token *tok,
2150 const char *exp_filename, int exp_linenum,
2151 int exp_start_col, int exp_finish_col)
2153 location_t tok_loc = tok->src_loc;
2154 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2155 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2157 /* If location_t values are sufficiently high, then column numbers
2158 will be unavailable. */
2159 if (!should_have_column_data_p (tok_loc))
2160 return;
2162 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2163 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2164 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2165 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2168 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2169 SELFTEST_LOCATION as the effective location of the selftest. */
2171 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2172 EXP_START_COL, EXP_FINISH_COL) \
2173 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2174 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2176 /* Test of lexing a file using libcpp, verifying tokens and their
2177 location information. */
2179 static void
2180 test_lexer (const line_table_case &case_)
2182 /* Create a tempfile and write some text to it. */
2183 const char *content =
2184 /*00000000011111111112222222222333333.3333444444444.455555555556
2185 12345678901234567890123456789012345.6789012345678.901234567890. */
2186 ("test_name /* c-style comment */\n"
2187 " \"test literal\"\n"
2188 " // test c++-style comment\n"
2189 " 42\n");
2190 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2192 line_table_test ltt (case_);
2194 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2196 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2197 ASSERT_NE (fname, NULL);
2199 /* Verify that we get the expected tokens back, with the correct
2200 location information. */
2202 location_t loc;
2203 const cpp_token *tok;
2204 tok = cpp_get_token_with_location (parser, &loc);
2205 ASSERT_NE (tok, NULL);
2206 ASSERT_EQ (tok->type, CPP_NAME);
2207 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2208 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2210 tok = cpp_get_token_with_location (parser, &loc);
2211 ASSERT_NE (tok, NULL);
2212 ASSERT_EQ (tok->type, CPP_STRING);
2213 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2214 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2216 tok = cpp_get_token_with_location (parser, &loc);
2217 ASSERT_NE (tok, NULL);
2218 ASSERT_EQ (tok->type, CPP_NUMBER);
2219 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2220 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2222 tok = cpp_get_token_with_location (parser, &loc);
2223 ASSERT_NE (tok, NULL);
2224 ASSERT_EQ (tok->type, CPP_EOF);
2226 cpp_finish (parser, NULL);
2227 cpp_destroy (parser);
2230 /* Forward decls. */
2232 class lexer_test;
2233 class lexer_test_options;
2235 /* A class for specifying options of a lexer_test.
2236 The "apply" vfunc is called during the lexer_test constructor. */
2238 class lexer_test_options
2240 public:
2241 virtual void apply (lexer_test &) = 0;
2244 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2245 in its dtor.
2247 This is needed by struct lexer_test to ensure that the cleanup of the
2248 cpp_reader happens *after* the cleanup of the temp_source_file. */
2250 class cpp_reader_ptr
2252 public:
2253 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2255 ~cpp_reader_ptr ()
2257 cpp_finish (m_ptr, NULL);
2258 cpp_destroy (m_ptr);
2261 operator cpp_reader * () const { return m_ptr; }
2263 private:
2264 cpp_reader *m_ptr;
2267 /* A struct for writing lexer tests. */
2269 class lexer_test
2271 public:
2272 lexer_test (const line_table_case &case_, const char *content,
2273 lexer_test_options *options);
2274 ~lexer_test ();
2276 const cpp_token *get_token ();
2278 /* The ordering of these fields matters.
2279 The line_table_test must be first, since the cpp_reader_ptr
2280 uses it.
2281 The cpp_reader must be cleaned up *after* the temp_source_file
2282 since the filenames in input.cc's input cache are owned by the
2283 cpp_reader; in particular, when ~temp_source_file evicts the
2284 filename the filenames must still be alive. */
2285 line_table_test m_ltt;
2286 cpp_reader_ptr m_parser;
2287 temp_source_file m_tempfile;
2288 string_concat_db m_concats;
2289 bool m_implicitly_expect_EOF;
2292 /* Use an EBCDIC encoding for the execution charset, specifically
2293 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2295 This exercises iconv integration within libcpp.
2296 Not every build of iconv supports the given charset,
2297 so we need to flag this error and handle it gracefully. */
2299 class ebcdic_execution_charset : public lexer_test_options
2301 public:
2302 ebcdic_execution_charset () : m_num_iconv_errors (0)
2304 gcc_assert (s_singleton == NULL);
2305 s_singleton = this;
2307 ~ebcdic_execution_charset ()
2309 gcc_assert (s_singleton == this);
2310 s_singleton = NULL;
2313 void apply (lexer_test &test) FINAL OVERRIDE
2315 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2316 cpp_opts->narrow_charset = "IBM1047";
2318 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2319 callbacks->diagnostic = on_diagnostic;
2322 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2323 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2324 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2325 rich_location *richloc ATTRIBUTE_UNUSED,
2326 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2327 ATTRIBUTE_FPTR_PRINTF(5,0)
2329 gcc_assert (s_singleton);
2330 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2331 const char *msg = "conversion from %s to %s not supported by iconv";
2332 #ifdef ENABLE_NLS
2333 msg = dgettext ("cpplib", msg);
2334 #endif
2335 /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2336 when the local iconv build doesn't support the conversion. */
2337 if (strcmp (msgid, msg) == 0)
2339 s_singleton->m_num_iconv_errors++;
2340 return true;
2343 /* Otherwise, we have an unexpected error. */
2344 abort ();
2347 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2349 private:
2350 static ebcdic_execution_charset *s_singleton;
2351 int m_num_iconv_errors;
2354 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2356 /* A lexer_test_options subclass that records a list of diagnostic
2357 messages emitted by the lexer. */
2359 class lexer_diagnostic_sink : public lexer_test_options
2361 public:
2362 lexer_diagnostic_sink ()
2364 gcc_assert (s_singleton == NULL);
2365 s_singleton = this;
2367 ~lexer_diagnostic_sink ()
2369 gcc_assert (s_singleton == this);
2370 s_singleton = NULL;
2372 int i;
2373 char *str;
2374 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2375 free (str);
2378 void apply (lexer_test &test) FINAL OVERRIDE
2380 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2381 callbacks->diagnostic = on_diagnostic;
2384 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2385 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2386 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2387 rich_location *richloc ATTRIBUTE_UNUSED,
2388 const char *msgid, va_list *ap)
2389 ATTRIBUTE_FPTR_PRINTF(5,0)
2391 char *msg = xvasprintf (msgid, *ap);
2392 s_singleton->m_diagnostics.safe_push (msg);
2393 return true;
2396 auto_vec<char *> m_diagnostics;
2398 private:
2399 static lexer_diagnostic_sink *s_singleton;
2402 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2404 /* Constructor. Override line_table with a new instance based on CASE_,
2405 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2406 start parsing the tempfile. */
2408 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2409 lexer_test_options *options)
2410 : m_ltt (case_),
2411 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2412 /* Create a tempfile and write the text to it. */
2413 m_tempfile (SELFTEST_LOCATION, ".c", content),
2414 m_concats (),
2415 m_implicitly_expect_EOF (true)
2417 if (options)
2418 options->apply (*this);
2420 cpp_init_iconv (m_parser);
2422 /* Parse the file. */
2423 const char *fname = cpp_read_main_file (m_parser,
2424 m_tempfile.get_filename ());
2425 ASSERT_NE (fname, NULL);
2428 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2430 lexer_test::~lexer_test ()
2432 location_t loc;
2433 const cpp_token *tok;
2435 if (m_implicitly_expect_EOF)
2437 tok = cpp_get_token_with_location (m_parser, &loc);
2438 ASSERT_NE (tok, NULL);
2439 ASSERT_EQ (tok->type, CPP_EOF);
2443 /* Get the next token from m_parser. */
2445 const cpp_token *
2446 lexer_test::get_token ()
2448 location_t loc;
2449 const cpp_token *tok;
2451 tok = cpp_get_token_with_location (m_parser, &loc);
2452 ASSERT_NE (tok, NULL);
2453 return tok;
2456 /* Verify that locations within string literals are correctly handled. */
2458 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2459 using the string concatenation database for TEST.
2461 Assert that the character at index IDX is on EXPECTED_LINE,
2462 and that it begins at column EXPECTED_START_COL and ends at
2463 EXPECTED_FINISH_COL (unless the locations are beyond
2464 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2465 columns). */
2467 static void
2468 assert_char_at_range (const location &loc,
2469 lexer_test& test,
2470 location_t strloc, enum cpp_ttype type, int idx,
2471 int expected_line, int expected_start_col,
2472 int expected_finish_col)
2474 cpp_reader *pfile = test.m_parser;
2475 string_concat_db *concats = &test.m_concats;
2477 source_range actual_range = source_range();
2478 const char *err
2479 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2480 &actual_range);
2481 if (should_have_column_data_p (strloc))
2482 ASSERT_EQ_AT (loc, NULL, err);
2483 else
2485 ASSERT_STREQ_AT (loc,
2486 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2487 err);
2488 return;
2491 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2492 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2493 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2494 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2496 if (should_have_column_data_p (actual_range.m_start))
2498 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2499 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2501 if (should_have_column_data_p (actual_range.m_finish))
2503 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2504 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2508 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2509 the effective location of any errors. */
2511 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2512 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2513 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2514 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2515 (EXPECTED_FINISH_COL))
2517 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2518 using the string concatenation database for TEST.
2520 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2522 static void
2523 assert_num_substring_ranges (const location &loc,
2524 lexer_test& test,
2525 location_t strloc,
2526 enum cpp_ttype type,
2527 int expected_num_ranges)
2529 cpp_reader *pfile = test.m_parser;
2530 string_concat_db *concats = &test.m_concats;
2532 int actual_num_ranges = -1;
2533 const char *err
2534 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2535 &actual_num_ranges);
2536 if (should_have_column_data_p (strloc))
2537 ASSERT_EQ_AT (loc, NULL, err);
2538 else
2540 ASSERT_STREQ_AT (loc,
2541 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2542 err);
2543 return;
2545 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2548 /* Macro for calling assert_num_substring_ranges, supplying
2549 SELFTEST_LOCATION for the effective location of any errors. */
2551 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2552 EXPECTED_NUM_RANGES) \
2553 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2554 (TYPE), (EXPECTED_NUM_RANGES))
2557 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2558 returns an error (using the string concatenation database for TEST). */
2560 static void
2561 assert_has_no_substring_ranges (const location &loc,
2562 lexer_test& test,
2563 location_t strloc,
2564 enum cpp_ttype type,
2565 const char *expected_err)
2567 cpp_reader *pfile = test.m_parser;
2568 string_concat_db *concats = &test.m_concats;
2569 cpp_substring_ranges ranges;
2570 const char *actual_err
2571 = get_substring_ranges_for_loc (pfile, concats, strloc,
2572 type, ranges);
2573 if (should_have_column_data_p (strloc))
2574 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2575 else
2576 ASSERT_STREQ_AT (loc,
2577 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2578 actual_err);
2581 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2582 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2583 (STRLOC), (TYPE), (ERR))
2585 /* Lex a simple string literal. Verify the substring location data, before
2586 and after running cpp_interpret_string on it. */
2588 static void
2589 test_lexer_string_locations_simple (const line_table_case &case_)
2591 /* Digits 0-9 (with 0 at column 10), the simple way.
2592 ....................000000000.11111111112.2222222223333333333
2593 ....................123456789.01234567890.1234567890123456789
2594 We add a trailing comment to ensure that we correctly locate
2595 the end of the string literal token. */
2596 const char *content = " \"0123456789\" /* not a string */\n";
2597 lexer_test test (case_, content, NULL);
2599 /* Verify that we get the expected token back, with the correct
2600 location information. */
2601 const cpp_token *tok = test.get_token ();
2602 ASSERT_EQ (tok->type, CPP_STRING);
2603 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2604 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2606 /* At this point in lexing, the quote characters are treated as part of
2607 the string (they are stripped off by cpp_interpret_string). */
2609 ASSERT_EQ (tok->val.str.len, 12);
2611 /* Verify that cpp_interpret_string works. */
2612 cpp_string dst_string;
2613 const enum cpp_ttype type = CPP_STRING;
2614 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2615 &dst_string, type);
2616 ASSERT_TRUE (result);
2617 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2618 free (const_cast <unsigned char *> (dst_string.text));
2620 /* Verify ranges of individual characters. This no longer includes the
2621 opening quote, but does include the closing quote. */
2622 for (int i = 0; i <= 10; i++)
2623 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2624 10 + i, 10 + i);
2626 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2629 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2630 encoding. */
2632 static void
2633 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2635 /* EBCDIC support requires iconv. */
2636 if (!HAVE_ICONV)
2637 return;
2639 /* Digits 0-9 (with 0 at column 10), the simple way.
2640 ....................000000000.11111111112.2222222223333333333
2641 ....................123456789.01234567890.1234567890123456789
2642 We add a trailing comment to ensure that we correctly locate
2643 the end of the string literal token. */
2644 const char *content = " \"0123456789\" /* not a string */\n";
2645 ebcdic_execution_charset use_ebcdic;
2646 lexer_test test (case_, content, &use_ebcdic);
2648 /* Verify that we get the expected token back, with the correct
2649 location information. */
2650 const cpp_token *tok = test.get_token ();
2651 ASSERT_EQ (tok->type, CPP_STRING);
2652 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2653 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2655 /* At this point in lexing, the quote characters are treated as part of
2656 the string (they are stripped off by cpp_interpret_string). */
2658 ASSERT_EQ (tok->val.str.len, 12);
2660 /* The remainder of the test requires an iconv implementation that
2661 can convert from UTF-8 to the EBCDIC encoding requested above. */
2662 if (use_ebcdic.iconv_errors_occurred_p ())
2663 return;
2665 /* Verify that cpp_interpret_string works. */
2666 cpp_string dst_string;
2667 const enum cpp_ttype type = CPP_STRING;
2668 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2669 &dst_string, type);
2670 ASSERT_TRUE (result);
2671 /* We should now have EBCDIC-encoded text, specifically
2672 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2673 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2674 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2675 (const char *)dst_string.text);
2676 free (const_cast <unsigned char *> (dst_string.text));
2678 /* Verify that we don't attempt to record substring location information
2679 for such cases. */
2680 ASSERT_HAS_NO_SUBSTRING_RANGES
2681 (test, tok->src_loc, type,
2682 "execution character set != source character set");
2685 /* Lex a string literal containing a hex-escaped character.
2686 Verify the substring location data, before and after running
2687 cpp_interpret_string on it. */
2689 static void
2690 test_lexer_string_locations_hex (const line_table_case &case_)
2692 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2693 and with a space in place of digit 6, to terminate the escaped
2694 hex code.
2695 ....................000000000.111111.11112222.
2696 ....................123456789.012345.67890123. */
2697 const char *content = " \"01234\\x35 789\"\n";
2698 lexer_test test (case_, content, NULL);
2700 /* Verify that we get the expected token back, with the correct
2701 location information. */
2702 const cpp_token *tok = test.get_token ();
2703 ASSERT_EQ (tok->type, CPP_STRING);
2704 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2705 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2707 /* At this point in lexing, the quote characters are treated as part of
2708 the string (they are stripped off by cpp_interpret_string). */
2709 ASSERT_EQ (tok->val.str.len, 15);
2711 /* Verify that cpp_interpret_string works. */
2712 cpp_string dst_string;
2713 const enum cpp_ttype type = CPP_STRING;
2714 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2715 &dst_string, type);
2716 ASSERT_TRUE (result);
2717 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2718 free (const_cast <unsigned char *> (dst_string.text));
2720 /* Verify ranges of individual characters. This no longer includes the
2721 opening quote, but does include the closing quote. */
2722 for (int i = 0; i <= 4; i++)
2723 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2724 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2725 for (int i = 6; i <= 10; i++)
2726 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2728 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2731 /* Lex a string literal containing an octal-escaped character.
2732 Verify the substring location data after running cpp_interpret_string
2733 on it. */
2735 static void
2736 test_lexer_string_locations_oct (const line_table_case &case_)
2738 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2739 and with a space in place of digit 6, to terminate the escaped
2740 octal code.
2741 ....................000000000.111111.11112222.2222223333333333444
2742 ....................123456789.012345.67890123.4567890123456789012 */
2743 const char *content = " \"01234\\065 789\" /* not a string */\n";
2744 lexer_test test (case_, content, NULL);
2746 /* Verify that we get the expected token back, with the correct
2747 location information. */
2748 const cpp_token *tok = test.get_token ();
2749 ASSERT_EQ (tok->type, CPP_STRING);
2750 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2752 /* Verify that cpp_interpret_string works. */
2753 cpp_string dst_string;
2754 const enum cpp_ttype type = CPP_STRING;
2755 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2756 &dst_string, type);
2757 ASSERT_TRUE (result);
2758 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2759 free (const_cast <unsigned char *> (dst_string.text));
2761 /* Verify ranges of individual characters. This no longer includes the
2762 opening quote, but does include the closing quote. */
2763 for (int i = 0; i < 5; i++)
2764 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2765 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2766 for (int i = 6; i <= 10; i++)
2767 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2769 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2772 /* Test of string literal containing letter escapes. */
2774 static void
2775 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2777 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2778 .....................000000000.1.11111.1.1.11222.22222223333333
2779 .....................123456789.0.12345.6.7.89012.34567890123456. */
2780 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2781 lexer_test test (case_, content, NULL);
2783 /* Verify that we get the expected tokens back. */
2784 const cpp_token *tok = test.get_token ();
2785 ASSERT_EQ (tok->type, CPP_STRING);
2786 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2788 /* Verify ranges of individual characters. */
2789 /* "\t". */
2790 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2791 0, 1, 10, 11);
2792 /* "foo". */
2793 for (int i = 1; i <= 3; i++)
2794 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2795 i, 1, 11 + i, 11 + i);
2796 /* "\\" and "\n". */
2797 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2798 4, 1, 15, 16);
2799 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2800 5, 1, 17, 18);
2802 /* "bar" and closing quote for nul-terminator. */
2803 for (int i = 6; i <= 9; i++)
2804 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2805 i, 1, 13 + i, 13 + i);
2807 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2810 /* Another test of a string literal containing a letter escape.
2811 Based on string seen in
2812 printf ("%-%\n");
2813 in gcc.dg/format/c90-printf-1.c. */
2815 static void
2816 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2818 /* .....................000000000.1111.11.1111.22222222223.
2819 .....................123456789.0123.45.6789.01234567890. */
2820 const char *content = (" \"%-%\\n\" /* non-str */\n");
2821 lexer_test test (case_, content, NULL);
2823 /* Verify that we get the expected tokens back. */
2824 const cpp_token *tok = test.get_token ();
2825 ASSERT_EQ (tok->type, CPP_STRING);
2826 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2828 /* Verify ranges of individual characters. */
2829 /* "%-%". */
2830 for (int i = 0; i < 3; i++)
2831 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2832 i, 1, 10 + i, 10 + i);
2833 /* "\n". */
2834 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2835 3, 1, 13, 14);
2837 /* Closing quote for nul-terminator. */
2838 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2839 4, 1, 15, 15);
2841 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2844 /* Lex a string literal containing UCN 4 characters.
2845 Verify the substring location data after running cpp_interpret_string
2846 on it. */
2848 static void
2849 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2851 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2852 as UCN 4.
2853 ....................000000000.111111.111122.222222223.33333333344444
2854 ....................123456789.012345.678901.234567890.12345678901234 */
2855 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2856 lexer_test test (case_, content, NULL);
2858 /* Verify that we get the expected token back, with the correct
2859 location information. */
2860 const cpp_token *tok = test.get_token ();
2861 ASSERT_EQ (tok->type, CPP_STRING);
2862 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2864 /* Verify that cpp_interpret_string works.
2865 The string should be encoded in the execution character
2866 set. Assuming that is UTF-8, we should have the following:
2867 ----------- ---- ----- ------- ----------------
2868 Byte offset Byte Octal Unicode Source Column(s)
2869 ----------- ---- ----- ------- ----------------
2870 0 0x30 '0' 10
2871 1 0x31 '1' 11
2872 2 0x32 '2' 12
2873 3 0x33 '3' 13
2874 4 0x34 '4' 14
2875 5 0xE2 \342 U+2174 15-20
2876 6 0x85 \205 (cont) 15-20
2877 7 0xB4 \264 (cont) 15-20
2878 8 0xE2 \342 U+2175 21-26
2879 9 0x85 \205 (cont) 21-26
2880 10 0xB5 \265 (cont) 21-26
2881 11 0x37 '7' 27
2882 12 0x38 '8' 28
2883 13 0x39 '9' 29
2884 14 0x00 30 (closing quote)
2885 ----------- ---- ----- ------- ---------------. */
2887 cpp_string dst_string;
2888 const enum cpp_ttype type = CPP_STRING;
2889 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2890 &dst_string, type);
2891 ASSERT_TRUE (result);
2892 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2893 (const char *)dst_string.text);
2894 free (const_cast <unsigned char *> (dst_string.text));
2896 /* Verify ranges of individual characters. This no longer includes the
2897 opening quote, but does include the closing quote.
2898 '01234'. */
2899 for (int i = 0; i <= 4; i++)
2900 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2901 /* U+2174. */
2902 for (int i = 5; i <= 7; i++)
2903 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2904 /* U+2175. */
2905 for (int i = 8; i <= 10; i++)
2906 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2907 /* '789' and nul terminator */
2908 for (int i = 11; i <= 14; i++)
2909 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2911 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2914 /* Lex a string literal containing UCN 8 characters.
2915 Verify the substring location data after running cpp_interpret_string
2916 on it. */
2918 static void
2919 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2921 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2922 ....................000000000.111111.1111222222.2222333333333.344444
2923 ....................123456789.012345.6789012345.6789012345678.901234 */
2924 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2925 lexer_test test (case_, content, NULL);
2927 /* Verify that we get the expected token back, with the correct
2928 location information. */
2929 const cpp_token *tok = test.get_token ();
2930 ASSERT_EQ (tok->type, CPP_STRING);
2931 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2932 "\"01234\\U00002174\\U00002175789\"");
2934 /* Verify that cpp_interpret_string works.
2935 The UTF-8 encoding of the string is identical to that from
2936 the ucn4 testcase above; the only difference is the column
2937 locations. */
2938 cpp_string dst_string;
2939 const enum cpp_ttype type = CPP_STRING;
2940 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2941 &dst_string, type);
2942 ASSERT_TRUE (result);
2943 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2944 (const char *)dst_string.text);
2945 free (const_cast <unsigned char *> (dst_string.text));
2947 /* Verify ranges of individual characters. This no longer includes the
2948 opening quote, but does include the closing quote.
2949 '01234'. */
2950 for (int i = 0; i <= 4; i++)
2951 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2952 /* U+2174. */
2953 for (int i = 5; i <= 7; i++)
2954 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2955 /* U+2175. */
2956 for (int i = 8; i <= 10; i++)
2957 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2958 /* '789' at columns 35-37 */
2959 for (int i = 11; i <= 13; i++)
2960 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2961 /* Closing quote/nul-terminator at column 38. */
2962 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2964 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2967 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2969 static uint32_t
2970 uint32_from_big_endian (const uint32_t *ptr_be_value)
2972 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2973 return (((uint32_t) buf[0] << 24)
2974 | ((uint32_t) buf[1] << 16)
2975 | ((uint32_t) buf[2] << 8)
2976 | (uint32_t) buf[3]);
2979 /* Lex a wide string literal and verify that attempts to read substring
2980 location data from it fail gracefully. */
2982 static void
2983 test_lexer_string_locations_wide_string (const line_table_case &case_)
2985 /* Digits 0-9.
2986 ....................000000000.11111111112.22222222233333
2987 ....................123456789.01234567890.12345678901234 */
2988 const char *content = " L\"0123456789\" /* non-str */\n";
2989 lexer_test test (case_, content, NULL);
2991 /* Verify that we get the expected token back, with the correct
2992 location information. */
2993 const cpp_token *tok = test.get_token ();
2994 ASSERT_EQ (tok->type, CPP_WSTRING);
2995 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2997 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2998 cpp_string dst_string;
2999 const enum cpp_ttype type = CPP_WSTRING;
3000 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3001 &dst_string, type);
3002 ASSERT_TRUE (result);
3003 /* The cpp_reader defaults to big-endian with
3004 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3005 now be encoded as UTF-32BE. */
3006 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3007 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3008 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3009 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3010 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3011 free (const_cast <unsigned char *> (dst_string.text));
3013 /* We don't yet support generating substring location information
3014 for L"" strings. */
3015 ASSERT_HAS_NO_SUBSTRING_RANGES
3016 (test, tok->src_loc, type,
3017 "execution character set != source character set");
3020 /* Fetch a big-endian 16-bit value and convert to host endianness. */
3022 static uint16_t
3023 uint16_from_big_endian (const uint16_t *ptr_be_value)
3025 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3026 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3029 /* Lex a u"" string literal and verify that attempts to read substring
3030 location data from it fail gracefully. */
3032 static void
3033 test_lexer_string_locations_string16 (const line_table_case &case_)
3035 /* Digits 0-9.
3036 ....................000000000.11111111112.22222222233333
3037 ....................123456789.01234567890.12345678901234 */
3038 const char *content = " u\"0123456789\" /* non-str */\n";
3039 lexer_test test (case_, content, NULL);
3041 /* Verify that we get the expected token back, with the correct
3042 location information. */
3043 const cpp_token *tok = test.get_token ();
3044 ASSERT_EQ (tok->type, CPP_STRING16);
3045 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3047 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3048 cpp_string dst_string;
3049 const enum cpp_ttype type = CPP_STRING16;
3050 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3051 &dst_string, type);
3052 ASSERT_TRUE (result);
3054 /* The cpp_reader defaults to big-endian, so dst_string should
3055 now be encoded as UTF-16BE. */
3056 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3057 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3058 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3059 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3060 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3061 free (const_cast <unsigned char *> (dst_string.text));
3063 /* We don't yet support generating substring location information
3064 for L"" strings. */
3065 ASSERT_HAS_NO_SUBSTRING_RANGES
3066 (test, tok->src_loc, type,
3067 "execution character set != source character set");
3070 /* Lex a U"" string literal and verify that attempts to read substring
3071 location data from it fail gracefully. */
3073 static void
3074 test_lexer_string_locations_string32 (const line_table_case &case_)
3076 /* Digits 0-9.
3077 ....................000000000.11111111112.22222222233333
3078 ....................123456789.01234567890.12345678901234 */
3079 const char *content = " U\"0123456789\" /* non-str */\n";
3080 lexer_test test (case_, content, NULL);
3082 /* Verify that we get the expected token back, with the correct
3083 location information. */
3084 const cpp_token *tok = test.get_token ();
3085 ASSERT_EQ (tok->type, CPP_STRING32);
3086 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3088 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3089 cpp_string dst_string;
3090 const enum cpp_ttype type = CPP_STRING32;
3091 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3092 &dst_string, type);
3093 ASSERT_TRUE (result);
3095 /* The cpp_reader defaults to big-endian, so dst_string should
3096 now be encoded as UTF-32BE. */
3097 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3098 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3099 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3100 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3101 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3102 free (const_cast <unsigned char *> (dst_string.text));
3104 /* We don't yet support generating substring location information
3105 for L"" strings. */
3106 ASSERT_HAS_NO_SUBSTRING_RANGES
3107 (test, tok->src_loc, type,
3108 "execution character set != source character set");
3111 /* Lex a u8-string literal.
3112 Verify the substring location data after running cpp_interpret_string
3113 on it. */
3115 static void
3116 test_lexer_string_locations_u8 (const line_table_case &case_)
3118 /* Digits 0-9.
3119 ....................000000000.11111111112.22222222233333
3120 ....................123456789.01234567890.12345678901234 */
3121 const char *content = " u8\"0123456789\" /* non-str */\n";
3122 lexer_test test (case_, content, NULL);
3124 /* Verify that we get the expected token back, with the correct
3125 location information. */
3126 const cpp_token *tok = test.get_token ();
3127 ASSERT_EQ (tok->type, CPP_UTF8STRING);
3128 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3130 /* Verify that cpp_interpret_string works. */
3131 cpp_string dst_string;
3132 const enum cpp_ttype type = CPP_STRING;
3133 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3134 &dst_string, type);
3135 ASSERT_TRUE (result);
3136 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3137 free (const_cast <unsigned char *> (dst_string.text));
3139 /* Verify ranges of individual characters. This no longer includes the
3140 opening quote, but does include the closing quote. */
3141 for (int i = 0; i <= 10; i++)
3142 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3145 /* Lex a string literal containing UTF-8 source characters.
3146 Verify the substring location data after running cpp_interpret_string
3147 on it. */
3149 static void
3150 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3152 /* This string literal is written out to the source file as UTF-8,
3153 and is of the form "before mojibake after", where "mojibake"
3154 is written as the following four unicode code points:
3155 U+6587 CJK UNIFIED IDEOGRAPH-6587
3156 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3157 U+5316 CJK UNIFIED IDEOGRAPH-5316
3158 U+3051 HIRAGANA LETTER KE.
3159 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3160 "before" and "after" are 1 byte per unicode character.
3162 The numbering shown are "columns", which are *byte* numbers within
3163 the line, rather than unicode character numbers.
3165 .................... 000000000.1111111.
3166 .................... 123456789.0123456. */
3167 const char *content = (" \"before "
3168 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3169 UTF-8: 0xE6 0x96 0x87
3170 C octal escaped UTF-8: \346\226\207
3171 "column" numbers: 17-19. */
3172 "\346\226\207"
3174 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3175 UTF-8: 0xE5 0xAD 0x97
3176 C octal escaped UTF-8: \345\255\227
3177 "column" numbers: 20-22. */
3178 "\345\255\227"
3180 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3181 UTF-8: 0xE5 0x8C 0x96
3182 C octal escaped UTF-8: \345\214\226
3183 "column" numbers: 23-25. */
3184 "\345\214\226"
3186 /* U+3051 HIRAGANA LETTER KE
3187 UTF-8: 0xE3 0x81 0x91
3188 C octal escaped UTF-8: \343\201\221
3189 "column" numbers: 26-28. */
3190 "\343\201\221"
3192 /* column numbers 29 onwards
3193 2333333.33334444444444
3194 9012345.67890123456789. */
3195 " after\" /* non-str */\n");
3196 lexer_test test (case_, content, NULL);
3198 /* Verify that we get the expected token back, with the correct
3199 location information. */
3200 const cpp_token *tok = test.get_token ();
3201 ASSERT_EQ (tok->type, CPP_STRING);
3202 ASSERT_TOKEN_AS_TEXT_EQ
3203 (test.m_parser, tok,
3204 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3206 /* Verify that cpp_interpret_string works. */
3207 cpp_string dst_string;
3208 const enum cpp_ttype type = CPP_STRING;
3209 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3210 &dst_string, type);
3211 ASSERT_TRUE (result);
3212 ASSERT_STREQ
3213 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3214 (const char *)dst_string.text);
3215 free (const_cast <unsigned char *> (dst_string.text));
3217 /* Verify ranges of individual characters. This no longer includes the
3218 opening quote, but does include the closing quote.
3219 Assuming that both source and execution encodings are UTF-8, we have
3220 a run of 25 octets in each, plus the NUL terminator. */
3221 for (int i = 0; i < 25; i++)
3222 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3223 /* NUL-terminator should use the closing quote at column 35. */
3224 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3226 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3229 /* Test of string literal concatenation. */
3231 static void
3232 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3234 /* Digits 0-9.
3235 .....................000000000.111111.11112222222222
3236 .....................123456789.012345.67890123456789. */
3237 const char *content = (" \"01234\" /* non-str */\n"
3238 " \"56789\" /* non-str */\n");
3239 lexer_test test (case_, content, NULL);
3241 location_t input_locs[2];
3243 /* Verify that we get the expected tokens back. */
3244 auto_vec <cpp_string> input_strings;
3245 const cpp_token *tok_a = test.get_token ();
3246 ASSERT_EQ (tok_a->type, CPP_STRING);
3247 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3248 input_strings.safe_push (tok_a->val.str);
3249 input_locs[0] = tok_a->src_loc;
3251 const cpp_token *tok_b = test.get_token ();
3252 ASSERT_EQ (tok_b->type, CPP_STRING);
3253 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3254 input_strings.safe_push (tok_b->val.str);
3255 input_locs[1] = tok_b->src_loc;
3257 /* Verify that cpp_interpret_string works. */
3258 cpp_string dst_string;
3259 const enum cpp_ttype type = CPP_STRING;
3260 bool result = cpp_interpret_string (test.m_parser,
3261 input_strings.address (), 2,
3262 &dst_string, type);
3263 ASSERT_TRUE (result);
3264 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3265 free (const_cast <unsigned char *> (dst_string.text));
3267 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3268 test.m_concats.record_string_concatenation (2, input_locs);
3270 location_t initial_loc = input_locs[0];
3272 /* "01234" on line 1. */
3273 for (int i = 0; i <= 4; i++)
3274 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3275 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3276 for (int i = 5; i <= 10; i++)
3277 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3279 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3282 /* Another test of string literal concatenation. */
3284 static void
3285 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3287 /* Digits 0-9.
3288 .....................000000000.111.11111112222222
3289 .....................123456789.012.34567890123456. */
3290 const char *content = (" \"01\" /* non-str */\n"
3291 " \"23\" /* non-str */\n"
3292 " \"45\" /* non-str */\n"
3293 " \"67\" /* non-str */\n"
3294 " \"89\" /* non-str */\n");
3295 lexer_test test (case_, content, NULL);
3297 auto_vec <cpp_string> input_strings;
3298 location_t input_locs[5];
3300 /* Verify that we get the expected tokens back. */
3301 for (int i = 0; i < 5; i++)
3303 const cpp_token *tok = test.get_token ();
3304 ASSERT_EQ (tok->type, CPP_STRING);
3305 input_strings.safe_push (tok->val.str);
3306 input_locs[i] = tok->src_loc;
3309 /* Verify that cpp_interpret_string works. */
3310 cpp_string dst_string;
3311 const enum cpp_ttype type = CPP_STRING;
3312 bool result = cpp_interpret_string (test.m_parser,
3313 input_strings.address (), 5,
3314 &dst_string, type);
3315 ASSERT_TRUE (result);
3316 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3317 free (const_cast <unsigned char *> (dst_string.text));
3319 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3320 test.m_concats.record_string_concatenation (5, input_locs);
3322 location_t initial_loc = input_locs[0];
3324 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3325 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3326 and expect get_source_range_for_substring to fail.
3327 However, for a string concatenation test, we can have a case
3328 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3329 but subsequent strings can be after it.
3330 Attempting to detect this within assert_char_at_range
3331 would overcomplicate the logic for the common test cases, so
3332 we detect it here. */
3333 if (should_have_column_data_p (input_locs[0])
3334 && !should_have_column_data_p (input_locs[4]))
3336 /* Verify that get_source_range_for_substring gracefully rejects
3337 this case. */
3338 source_range actual_range;
3339 const char *err
3340 = get_source_range_for_char (test.m_parser, &test.m_concats,
3341 initial_loc, type, 0, &actual_range);
3342 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3343 return;
3346 for (int i = 0; i < 5; i++)
3347 for (int j = 0; j < 2; j++)
3348 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3349 i + 1, 10 + j, 10 + j);
3351 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3352 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3354 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3357 /* Another test of string literal concatenation, this time combined with
3358 various kinds of escaped characters. */
3360 static void
3361 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3363 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3364 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3365 const char *content
3366 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3367 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3368 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3369 lexer_test test (case_, content, NULL);
3371 auto_vec <cpp_string> input_strings;
3372 location_t input_locs[4];
3374 /* Verify that we get the expected tokens back. */
3375 for (int i = 0; i < 4; i++)
3377 const cpp_token *tok = test.get_token ();
3378 ASSERT_EQ (tok->type, CPP_STRING);
3379 input_strings.safe_push (tok->val.str);
3380 input_locs[i] = tok->src_loc;
3383 /* Verify that cpp_interpret_string works. */
3384 cpp_string dst_string;
3385 const enum cpp_ttype type = CPP_STRING;
3386 bool result = cpp_interpret_string (test.m_parser,
3387 input_strings.address (), 4,
3388 &dst_string, type);
3389 ASSERT_TRUE (result);
3390 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3391 free (const_cast <unsigned char *> (dst_string.text));
3393 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3394 test.m_concats.record_string_concatenation (4, input_locs);
3396 location_t initial_loc = input_locs[0];
3398 for (int i = 0; i <= 4; i++)
3399 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3400 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3401 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3402 for (int i = 7; i <= 9; i++)
3403 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3405 /* NUL-terminator should use the location of the final closing quote. */
3406 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3408 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3411 /* Test of string literal in a macro. */
3413 static void
3414 test_lexer_string_locations_macro (const line_table_case &case_)
3416 /* Digits 0-9.
3417 .....................0000000001111111111.22222222223.
3418 .....................1234567890123456789.01234567890. */
3419 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3420 " MACRO");
3421 lexer_test test (case_, content, NULL);
3423 /* Verify that we get the expected tokens back. */
3424 const cpp_token *tok = test.get_token ();
3425 ASSERT_EQ (tok->type, CPP_PADDING);
3427 tok = test.get_token ();
3428 ASSERT_EQ (tok->type, CPP_STRING);
3429 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3431 /* Verify ranges of individual characters. We ought to
3432 see columns within the macro definition. */
3433 for (int i = 0; i <= 10; i++)
3434 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3435 i, 1, 20 + i, 20 + i);
3437 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3439 tok = test.get_token ();
3440 ASSERT_EQ (tok->type, CPP_PADDING);
3443 /* Test of stringification of a macro argument. */
3445 static void
3446 test_lexer_string_locations_stringified_macro_argument
3447 (const line_table_case &case_)
3449 /* .....................000000000111111111122222222223.
3450 .....................123456789012345678901234567890. */
3451 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3452 "MACRO(foo)\n");
3453 lexer_test test (case_, content, NULL);
3455 /* Verify that we get the expected token back. */
3456 const cpp_token *tok = test.get_token ();
3457 ASSERT_EQ (tok->type, CPP_PADDING);
3459 tok = test.get_token ();
3460 ASSERT_EQ (tok->type, CPP_STRING);
3461 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3463 /* We don't support getting the location of a stringified macro
3464 argument. Verify that it fails gracefully. */
3465 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3466 "cpp_interpret_string_1 failed");
3468 tok = test.get_token ();
3469 ASSERT_EQ (tok->type, CPP_PADDING);
3471 tok = test.get_token ();
3472 ASSERT_EQ (tok->type, CPP_PADDING);
3475 /* Ensure that we are fail gracefully if something attempts to pass
3476 in a location that isn't a string literal token. Seen on this code:
3478 const char a[] = " %d ";
3479 __builtin_printf (a, 0.5);
3482 when c-format.cc erroneously used the indicated one-character
3483 location as the format string location, leading to a read past the
3484 end of a string buffer in cpp_interpret_string_1. */
3486 static void
3487 test_lexer_string_locations_non_string (const line_table_case &case_)
3489 /* .....................000000000111111111122222222223.
3490 .....................123456789012345678901234567890. */
3491 const char *content = (" a\n");
3492 lexer_test test (case_, content, NULL);
3494 /* Verify that we get the expected token back. */
3495 const cpp_token *tok = test.get_token ();
3496 ASSERT_EQ (tok->type, CPP_NAME);
3497 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3499 /* At this point, libcpp is attempting to interpret the name as a
3500 string literal, despite it not starting with a quote. We don't detect
3501 that, but we should at least fail gracefully. */
3502 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3503 "cpp_interpret_string_1 failed");
3506 /* Ensure that we can read substring information for a token which
3507 starts in one linemap and ends in another . Adapted from
3508 gcc.dg/cpp/pr69985.c. */
3510 static void
3511 test_lexer_string_locations_long_line (const line_table_case &case_)
3513 /* .....................000000.000111111111
3514 .....................123456.789012346789. */
3515 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3516 " \"0123456789012345678901234567890123456789"
3517 "0123456789012345678901234567890123456789"
3518 "0123456789012345678901234567890123456789"
3519 "0123456789\"\n");
3521 lexer_test test (case_, content, NULL);
3523 /* Verify that we get the expected token back. */
3524 const cpp_token *tok = test.get_token ();
3525 ASSERT_EQ (tok->type, CPP_STRING);
3527 if (!should_have_column_data_p (line_table->highest_location))
3528 return;
3530 /* Verify ranges of individual characters. */
3531 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3532 for (int i = 0; i < 131; i++)
3533 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3534 i, 2, 7 + i, 7 + i);
3537 /* Test of locations within a raw string that doesn't contain a newline. */
3539 static void
3540 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3542 /* .....................00.0000000111111111122.
3543 .....................12.3456789012345678901. */
3544 const char *content = ("R\"foo(0123456789)foo\"\n");
3545 lexer_test test (case_, content, NULL);
3547 /* Verify that we get the expected token back. */
3548 const cpp_token *tok = test.get_token ();
3549 ASSERT_EQ (tok->type, CPP_STRING);
3551 /* Verify that cpp_interpret_string works. */
3552 cpp_string dst_string;
3553 const enum cpp_ttype type = CPP_STRING;
3554 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3555 &dst_string, type);
3556 ASSERT_TRUE (result);
3557 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3558 free (const_cast <unsigned char *> (dst_string.text));
3560 if (!should_have_column_data_p (line_table->highest_location))
3561 return;
3563 /* 0-9, plus the nil terminator. */
3564 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3565 for (int i = 0; i < 11; i++)
3566 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3567 i, 1, 7 + i, 7 + i);
3570 /* Test of locations within a raw string that contains a newline. */
3572 static void
3573 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3575 /* .....................00.0000.
3576 .....................12.3456. */
3577 const char *content = ("R\"foo(\n"
3578 /* .....................00000.
3579 .....................12345. */
3580 "hello\n"
3581 "world\n"
3582 /* .....................00000.
3583 .....................12345. */
3584 ")foo\"\n");
3585 lexer_test test (case_, content, NULL);
3587 /* Verify that we get the expected token back. */
3588 const cpp_token *tok = test.get_token ();
3589 ASSERT_EQ (tok->type, CPP_STRING);
3591 /* Verify that cpp_interpret_string works. */
3592 cpp_string dst_string;
3593 const enum cpp_ttype type = CPP_STRING;
3594 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3595 &dst_string, type);
3596 ASSERT_TRUE (result);
3597 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3598 free (const_cast <unsigned char *> (dst_string.text));
3600 if (!should_have_column_data_p (line_table->highest_location))
3601 return;
3603 /* Currently we don't support locations within raw strings that
3604 contain newlines. */
3605 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3606 "range endpoints are on different lines");
3609 /* Test of parsing an unterminated raw string. */
3611 static void
3612 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3614 const char *content = "R\"ouch()ouCh\" /* etc */";
3616 lexer_diagnostic_sink diagnostics;
3617 lexer_test test (case_, content, &diagnostics);
3618 test.m_implicitly_expect_EOF = false;
3620 /* Attempt to parse the raw string. */
3621 const cpp_token *tok = test.get_token ();
3622 ASSERT_EQ (tok->type, CPP_EOF);
3624 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3625 /* We expect the message "unterminated raw string"
3626 in the "cpplib" translation domain.
3627 It's not clear that dgettext is available on all supported hosts,
3628 so this assertion is commented-out for now.
3629 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3630 diagnostics.m_diagnostics[0]);
3634 /* Test of lexing char constants. */
3636 static void
3637 test_lexer_char_constants (const line_table_case &case_)
3639 /* Various char constants.
3640 .....................0000000001111111111.22222222223.
3641 .....................1234567890123456789.01234567890. */
3642 const char *content = (" 'a'\n"
3643 " u'a'\n"
3644 " U'a'\n"
3645 " L'a'\n"
3646 " 'abc'\n");
3647 lexer_test test (case_, content, NULL);
3649 /* Verify that we get the expected tokens back. */
3650 /* 'a'. */
3651 const cpp_token *tok = test.get_token ();
3652 ASSERT_EQ (tok->type, CPP_CHAR);
3653 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3655 unsigned int chars_seen;
3656 int unsignedp;
3657 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3658 &chars_seen, &unsignedp);
3659 ASSERT_EQ (cc, 'a');
3660 ASSERT_EQ (chars_seen, 1);
3662 /* u'a'. */
3663 tok = test.get_token ();
3664 ASSERT_EQ (tok->type, CPP_CHAR16);
3665 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3667 /* U'a'. */
3668 tok = test.get_token ();
3669 ASSERT_EQ (tok->type, CPP_CHAR32);
3670 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3672 /* L'a'. */
3673 tok = test.get_token ();
3674 ASSERT_EQ (tok->type, CPP_WCHAR);
3675 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3677 /* 'abc' (c-char-sequence). */
3678 tok = test.get_token ();
3679 ASSERT_EQ (tok->type, CPP_CHAR);
3680 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3682 /* A table of interesting location_t values, giving one axis of our test
3683 matrix. */
3685 static const location_t boundary_locations[] = {
3686 /* Zero means "don't override the default values for a new line_table". */
3689 /* An arbitrary non-zero value that isn't close to one of
3690 the boundary values below. */
3691 0x10000,
3693 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3694 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3695 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3696 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3697 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3698 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3700 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3701 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3702 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3703 LINE_MAP_MAX_LOCATION_WITH_COLS,
3704 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3705 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3708 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3710 void
3711 for_each_line_table_case (void (*testcase) (const line_table_case &))
3713 /* As noted above in the description of struct line_table_case,
3714 we want to explore a test matrix of interesting line_table
3715 situations, running various selftests for each case within the
3716 matrix. */
3718 /* Run all tests with:
3719 (a) line_table->default_range_bits == 0, and
3720 (b) line_table->default_range_bits == 5. */
3721 int num_cases_tested = 0;
3722 for (int default_range_bits = 0; default_range_bits <= 5;
3723 default_range_bits += 5)
3725 /* ...and use each of the "interesting" location values as
3726 the starting location within line_table. */
3727 const int num_boundary_locations
3728 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3729 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3731 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3733 testcase (c);
3735 num_cases_tested++;
3739 /* Verify that we fully covered the test matrix. */
3740 ASSERT_EQ (num_cases_tested, 2 * 12);
3743 /* Verify that when presented with a consecutive pair of locations with
3744 a very large line offset, we don't attempt to consolidate them into
3745 a single ordinary linemap where the line offsets within the line map
3746 would lead to overflow (PR lto/88147). */
3748 static void
3749 test_line_offset_overflow ()
3751 line_table_test ltt (line_table_case (5, 0));
3753 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3754 linemap_line_start (line_table, 1, 100);
3755 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3756 assert_loceq ("foo.c", 2578, 0, loc_a);
3758 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3759 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3760 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3762 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3763 assert_loceq ("foo.c", 404198, 0, loc_b);
3765 /* We should have started a new linemap, rather than attempting to store
3766 a very large line offset. */
3767 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3768 ASSERT_NE (ordmap_a, ordmap_b);
3771 void test_cpp_utf8 ()
3773 const int def_tabstop = 8;
3774 cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3776 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3778 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3779 ASSERT_EQ (8, w_bad);
3780 int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3781 ASSERT_EQ (5, w_ctrl);
3784 /* Verify that wcwidth of valid UTF-8 is as expected. */
3786 const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3787 ASSERT_EQ (1, w_pi);
3788 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3789 ASSERT_EQ (2, w_emoji);
3790 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3791 policy);
3792 ASSERT_EQ (1, w_umlaut_precomposed);
3793 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3794 policy);
3795 ASSERT_EQ (1, w_umlaut_combining);
3796 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3797 ASSERT_EQ (2, w_han);
3798 const int w_ascii = cpp_display_width ("GCC", 3, policy);
3799 ASSERT_EQ (3, w_ascii);
3800 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3801 "\x9f! \xe4\xb8\xba y\xcc\x88",
3802 24, policy);
3803 ASSERT_EQ (18, w_mixed);
3806 /* Verify that display width properly expands tabs. */
3808 const char *tstr = "\tabc\td";
3809 ASSERT_EQ (6, cpp_display_width (tstr, 6,
3810 cpp_char_column_policy (1, cpp_wcwidth)));
3811 ASSERT_EQ (10, cpp_display_width (tstr, 6,
3812 cpp_char_column_policy (3, cpp_wcwidth)));
3813 ASSERT_EQ (17, cpp_display_width (tstr, 6,
3814 cpp_char_column_policy (8, cpp_wcwidth)));
3815 ASSERT_EQ (1,
3816 cpp_display_column_to_byte_column
3817 (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
3820 /* Verify that cpp_byte_column_to_display_column can go past the end,
3821 and similar edge cases. */
3823 const char *str
3824 /* Display columns.
3825 111111112345 */
3826 = "\xcf\x80 abc";
3827 /* 111122223456
3828 Byte columns. */
3830 ASSERT_EQ (5, cpp_display_width (str, 6, policy));
3831 ASSERT_EQ (105,
3832 cpp_byte_column_to_display_column (str, 6, 106, policy));
3833 ASSERT_EQ (10000,
3834 cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
3835 ASSERT_EQ (0,
3836 cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
3839 /* Verify that cpp_display_column_to_byte_column can go past the end,
3840 and similar edge cases, and check invertibility. */
3842 const char *str
3843 /* Display columns.
3844 000000000000000000000000000000000000011
3845 111111112222222234444444455555555678901 */
3846 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3847 /* 000000000000000000000000000000000111111
3848 111122223333444456666777788889999012345
3849 Byte columns. */
3850 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
3851 ASSERT_EQ (15,
3852 cpp_display_column_to_byte_column (str, 15, 11, policy));
3853 ASSERT_EQ (115,
3854 cpp_display_column_to_byte_column (str, 15, 111, policy));
3855 ASSERT_EQ (10000,
3856 cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
3857 ASSERT_EQ (0,
3858 cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
3860 /* Verify that we do not interrupt a UTF-8 sequence. */
3861 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
3863 for (int byte_col = 1; byte_col <= 15; ++byte_col)
3865 const int disp_col
3866 = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
3867 const int byte_col2
3868 = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
3870 /* If we ask for the display column in the middle of a UTF-8
3871 sequence, it will return the length of the partial sequence,
3872 matching the behavior of GCC before display column support.
3873 Otherwise check the round trip was successful. */
3874 if (byte_col < 4)
3875 ASSERT_EQ (byte_col, disp_col);
3876 else if (byte_col >= 6 && byte_col < 9)
3877 ASSERT_EQ (3 + (byte_col - 5), disp_col);
3878 else
3879 ASSERT_EQ (byte_col2, byte_col);
3885 /* Run all of the selftests within this file. */
3887 void
3888 input_cc_tests ()
3890 test_linenum_comparisons ();
3891 test_should_have_column_data_p ();
3892 test_unknown_location ();
3893 test_builtins ();
3894 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3896 for_each_line_table_case (test_accessing_ordinary_linemaps);
3897 for_each_line_table_case (test_lexer);
3898 for_each_line_table_case (test_lexer_string_locations_simple);
3899 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3900 for_each_line_table_case (test_lexer_string_locations_hex);
3901 for_each_line_table_case (test_lexer_string_locations_oct);
3902 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3903 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3904 for_each_line_table_case (test_lexer_string_locations_ucn4);
3905 for_each_line_table_case (test_lexer_string_locations_ucn8);
3906 for_each_line_table_case (test_lexer_string_locations_wide_string);
3907 for_each_line_table_case (test_lexer_string_locations_string16);
3908 for_each_line_table_case (test_lexer_string_locations_string32);
3909 for_each_line_table_case (test_lexer_string_locations_u8);
3910 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3911 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3912 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3913 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3914 for_each_line_table_case (test_lexer_string_locations_macro);
3915 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3916 for_each_line_table_case (test_lexer_string_locations_non_string);
3917 for_each_line_table_case (test_lexer_string_locations_long_line);
3918 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3919 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3920 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3921 for_each_line_table_case (test_lexer_char_constants);
3923 test_reading_source_line ();
3925 test_line_offset_overflow ();
3927 test_cpp_utf8 ();
3930 } // namespace selftest
3932 #endif /* CHECKING_P */