1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
24 #include "diagnostic.h"
33 special_fname_builtin ()
35 return _("<built-in>");
38 /* Input charset configuration. */
39 static const char *default_charset_callback (const char *)
45 file_cache::initialize_input_context (diagnostic_input_charset_callback ccb
,
48 m_input_context
.ccb
= (ccb
? ccb
: default_charset_callback
);
49 m_input_context
.should_skip_bom
= should_skip_bom
;
52 /* This is a cache used by get_next_line to store the content of a
53 file to be searched for file lines. */
60 void dump (FILE *out
, int indent
) const;
61 void DEBUG_FUNCTION
dump () const { dump (stderr
, 0); }
63 bool read_line_num (size_t line_num
,
64 char ** line
, ssize_t
*line_len
);
67 const char *get_file_path () const { return m_file_path
; }
68 unsigned get_use_count () const { return m_use_count
; }
69 bool missing_trailing_newline_p () const
71 return m_missing_trailing_newline
;
73 char_span
get_full_file_content ();
75 void inc_use_count () { m_use_count
++; }
77 bool create (const file_cache::input_context
&in_context
,
78 const char *file_path
, FILE *fp
, unsigned highest_use_count
);
80 void set_content (const char *buf
, size_t sz
);
83 /* These are information used to store a line boundary. */
87 /* The line number. It starts from 1. */
90 /* The position (byte count) of the beginning of the line,
91 relative to the file data pointer. This starts at zero. */
94 /* The position (byte count) of the last byte of the line. This
95 normally points to the '\n' character, or to one byte after the
96 last byte of the file, if the file doesn't contain a '\n'
100 line_info (size_t l
, size_t s
, size_t e
)
101 : line_num (l
), start_pos (s
), end_pos (e
)
105 :line_num (0), start_pos (0), end_pos (0)
109 bool needs_read_p () const;
110 bool needs_grow_p () const;
113 bool maybe_read_data ();
114 bool get_next_line (char **line
, ssize_t
*line_len
);
115 bool read_next_line (char ** line
, ssize_t
*line_len
);
116 bool goto_next_line ();
118 static const size_t buffer_size
= 4 * 1024;
119 static const size_t line_record_size
= 100;
121 /* The number of time this file has been accessed. This is used
122 to designate which file cache to evict from the cache
124 unsigned m_use_count
;
126 /* The file_path is the key for identifying a particular file in
128 For libcpp-using code, the underlying buffer for this field is
129 owned by the corresponding _cpp_file within the cpp_reader. */
130 const char *m_file_path
;
134 /* True when an read error happened. */
137 /* This points to the content of the file that we've read so
141 /* The allocated buffer to be freed may start a little earlier than DATA,
142 e.g. if a UTF8 BOM was skipped at the beginning. */
145 /* The size of the DATA array above.*/
148 /* The number of bytes read from the underlying file so far. This
149 must be less (or equal) than SIZE above. */
152 /* The index of the beginning of the current line. */
153 size_t m_line_start_idx
;
155 /* The number of the previous line read. This starts at 1. Zero
156 means we've read no line so far. */
159 /* This is the total number of lines of the current file. At the
160 moment, we try to get this information from the line map
161 subsystem. Note that this is just a hint. When using the C++
162 front-end, this hint is correct because the input file is then
163 completely tokenized before parsing starts; so the line map knows
164 the number of lines before compilation really starts. For e.g,
165 the C front-end, it can happen that we start emitting diagnostics
166 before the line map has seen the end of the file. */
167 size_t m_total_lines
;
169 /* Could this file be missing a trailing newline on its final line?
170 Initially true (to cope with empty files), set to true/false
171 as each line is read. */
172 bool m_missing_trailing_newline
;
174 /* This is a record of the beginning and end of the lines we've seen
175 while reading the file. This is useful to avoid walking the data
176 from the beginning when we are asked to read a line that is
177 before LINE_START_IDX above. Note that the maximum size of this
178 record is line_record_size, so that the memory consumption
179 doesn't explode. We thus scale total_lines down to
181 vec
<line_info
, va_heap
> m_line_record
;
183 void offset_buffer (int offset
)
185 gcc_assert (offset
< 0 ? m_alloc_offset
+ offset
>= 0
186 : (size_t) offset
<= m_size
);
188 m_alloc_offset
+= offset
;
196 find_end_of_line (const char *s
, size_t len
);
198 /* Current position in real source file. */
200 location_t input_location
= UNKNOWN_LOCATION
;
202 class line_maps
*line_table
;
204 /* A stashed copy of "line_table" for use by selftest::line_table_test.
205 This needs to be a global so that it can be a GC root, and thus
206 prevent the stashed copy from being garbage-collected if the GC runs
207 during a line_table_test. */
209 class line_maps
*saved_line_table
;
211 /* Expand the source location LOC into a human readable location. If
212 LOC resolves to a builtin location, the file name of the readable
213 location is set to the string "<built-in>". If EXPANSION_POINT_P is
214 TRUE and LOC is virtual, then it is resolved to the expansion
215 point of the involved macro. Otherwise, it is resolved to the
216 spelling location of the token.
218 When resolving to the spelling location of the token, if the
219 resulting location is for a built-in location (that is, it has no
220 associated line/column) in the context of a macro expansion, the
221 returned location is the first one (while unwinding the macro
222 location towards its expansion point) that is in real source
225 ASPECT controls which part of the location to use. */
227 static expanded_location
228 expand_location_1 (const line_maps
*set
,
230 bool expansion_point_p
,
231 enum location_aspect aspect
)
233 expanded_location xloc
;
234 const line_map_ordinary
*map
;
235 enum location_resolution_kind lrk
= LRK_MACRO_EXPANSION_POINT
;
238 if (IS_ADHOC_LOC (loc
))
240 block
= LOCATION_BLOCK (loc
);
241 loc
= LOCATION_LOCUS (loc
);
244 memset (&xloc
, 0, sizeof (xloc
));
246 if (loc
>= RESERVED_LOCATION_COUNT
)
248 if (!expansion_point_p
)
250 /* We want to resolve LOC to its spelling location.
252 But if that spelling location is a reserved location that
253 appears in the context of a macro expansion (like for a
254 location for a built-in token), let's consider the first
255 location (toward the expansion point) that is not reserved;
256 that is, the first location that is in real source code. */
257 loc
= linemap_unwind_to_first_non_reserved_loc (set
,
259 lrk
= LRK_SPELLING_LOCATION
;
261 loc
= linemap_resolve_location (set
, loc
, lrk
, &map
);
263 /* loc is now either in an ordinary map, or is a reserved location.
264 If it is a compound location, the caret is in a spelling location,
265 but the start/finish might still be a virtual location.
266 Depending of what the caller asked for, we may need to recurse
267 one level in order to resolve any virtual locations in the
274 case LOCATION_ASPECT_CARET
:
276 case LOCATION_ASPECT_START
:
278 location_t start
= get_start (loc
);
280 return expand_location_1 (set
, start
, expansion_point_p
, aspect
);
283 case LOCATION_ASPECT_FINISH
:
285 location_t finish
= get_finish (loc
);
287 return expand_location_1 (set
, finish
, expansion_point_p
, aspect
);
291 xloc
= linemap_expand_location (set
, map
, loc
);
295 if (loc
<= BUILTINS_LOCATION
)
296 xloc
.file
= loc
== UNKNOWN_LOCATION
? NULL
: special_fname_builtin ();
301 /* Return the total lines number that have been read so far by the
302 line map (in the preprocessor) so far. For languages like C++ that
303 entirely preprocess the input file before starting to parse, this
304 equals the actual number of lines of the file. */
307 total_lines_num (const char *file_path
)
311 if (linemap_get_file_highest_location (line_table
, file_path
, &l
))
313 gcc_assert (l
>= RESERVED_LOCATION_COUNT
);
314 expanded_location xloc
= expand_location (l
);
320 /* Lookup the cache used for the content of a given file accessed by
321 caret diagnostic. Return the found cached file, or NULL if no
322 cached file was found. */
325 file_cache::lookup_file (const char *file_path
)
327 gcc_assert (file_path
);
329 /* This will contain the found cached file. */
330 file_cache_slot
*r
= NULL
;
331 for (unsigned i
= 0; i
< num_file_slots
; ++i
)
333 file_cache_slot
*c
= &m_file_slots
[i
];
334 if (c
->get_file_path () && !strcmp (c
->get_file_path (), file_path
))
347 /* Purge any mention of FILENAME from the cache of files used for
348 printing source code. For use in selftests when working
352 file_cache::forcibly_evict_file (const char *file_path
)
354 gcc_assert (file_path
);
356 file_cache_slot
*r
= lookup_file (file_path
);
364 /* Determine if FILE_PATH missing a trailing newline on its final line.
365 Only valid to call once all of the file has been loaded, by
366 requesting a line number beyond the end of the file. */
369 file_cache::missing_trailing_newline_p (const char *file_path
)
371 gcc_assert (file_path
);
373 file_cache_slot
*r
= lookup_or_add_file (file_path
);
374 return r
->missing_trailing_newline_p ();
378 file_cache::add_buffered_content (const char *file_path
,
382 gcc_assert (file_path
);
384 file_cache_slot
*r
= lookup_file (file_path
);
387 unsigned highest_use_count
= 0;
388 r
= evicted_cache_tab_entry (&highest_use_count
);
389 if (!r
->create (m_input_context
, file_path
, nullptr, highest_use_count
))
393 r
->set_content (buffer
, sz
);
397 file_cache_slot::evict ()
405 m_line_start_idx
= 0;
407 m_line_record
.truncate (0);
410 m_missing_trailing_newline
= true;
413 /* Return the file cache that has been less used, recently, or the
414 first empty one. If HIGHEST_USE_COUNT is non-null,
415 *HIGHEST_USE_COUNT is set to the highest use count of the entries
416 in the cache table. */
419 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count
)
421 file_cache_slot
*to_evict
= &m_file_slots
[0];
422 unsigned huc
= to_evict
->get_use_count ();
423 for (unsigned i
= 1; i
< num_file_slots
; ++i
)
425 file_cache_slot
*c
= &m_file_slots
[i
];
426 bool c_is_empty
= (c
->get_file_path () == NULL
);
428 if (c
->get_use_count () < to_evict
->get_use_count ()
429 || (to_evict
->get_file_path () && c_is_empty
))
430 /* We evict C because it's either an entry with a lower use
431 count or one that is empty. */
434 if (huc
< c
->get_use_count ())
435 huc
= c
->get_use_count ();
438 /* We've reached the end of the cache; subsequent elements are
443 if (highest_use_count
)
444 *highest_use_count
= huc
;
449 /* Create the cache used for the content of a given file to be
450 accessed by caret diagnostic. This cache is added to an array of
451 cache and can be retrieved by lookup_file_in_cache_tab. This
452 function returns the created cache. Note that only the last
453 num_file_slots files are cached.
455 This can return nullptr if the FILE_PATH can't be opened for
456 reading, or if the content can't be converted to the input_charset. */
459 file_cache::add_file (const char *file_path
)
462 FILE *fp
= fopen (file_path
, "r");
466 unsigned highest_use_count
= 0;
467 file_cache_slot
*r
= evicted_cache_tab_entry (&highest_use_count
);
468 if (!r
->create (m_input_context
, file_path
, fp
, highest_use_count
))
473 /* Get a borrowed char_span to the full content of this file
474 as decoded according to the input charset, encoded as UTF-8. */
477 file_cache_slot::get_full_file_content ()
481 while (get_next_line (&line
, &line_len
))
484 return char_span (m_data
, m_nb_read
);
487 /* Populate this slot for use on FILE_PATH and FP, dropping any
488 existing cached content within it. */
491 file_cache_slot::create (const file_cache::input_context
&in_context
,
492 const char *file_path
, FILE *fp
,
493 unsigned highest_use_count
)
495 m_file_path
= file_path
;
501 offset_buffer (-m_alloc_offset
);
503 m_line_start_idx
= 0;
505 m_line_record
.truncate (0);
506 /* Ensure that this cache entry doesn't get evicted next time
507 add_file_to_cache_tab is called. */
508 m_use_count
= ++highest_use_count
;
509 m_total_lines
= total_lines_num (file_path
);
510 m_missing_trailing_newline
= true;
513 /* Check the input configuration to determine if we need to do any
514 transformations, such as charset conversion or BOM skipping. */
515 if (const char *input_charset
= in_context
.ccb (file_path
))
517 /* Need a full-blown conversion of the input charset. */
520 const cpp_converted_source cs
521 = cpp_get_converted_source (file_path
, input_charset
);
527 m_nb_read
= m_size
= cs
.len
;
528 m_alloc_offset
= cs
.data
- cs
.to_free
;
530 else if (in_context
.should_skip_bom
)
534 const int offset
= cpp_check_utf8_bom (m_data
, m_nb_read
);
535 offset_buffer (offset
);
544 file_cache_slot::set_content (const char *buf
, size_t sz
)
546 m_data
= (char *)xmalloc (sz
);
547 memcpy (m_data
, buf
, sz
);
548 m_nb_read
= m_size
= sz
;
557 /* Compute m_total_lines based on content of buffer. */
559 const char *line_start
= m_data
;
560 size_t remaining_size
= sz
;
561 while (const char *line_end
= find_end_of_line (line_start
, remaining_size
))
564 remaining_size
-= line_end
+ 1 - line_start
;
565 line_start
= line_end
+ 1;
569 /* file_cache's ctor. */
571 file_cache::file_cache ()
572 : m_file_slots (new file_cache_slot
[num_file_slots
])
574 initialize_input_context (nullptr, false);
577 /* file_cache's dtor. */
579 file_cache::~file_cache ()
581 delete[] m_file_slots
;
585 file_cache::dump (FILE *out
, int indent
) const
587 for (size_t i
= 0; i
< num_file_slots
; ++i
)
589 fprintf (out
, "%*sslot[%i]:\n", indent
, "", (int)i
);
590 m_file_slots
[i
].dump (out
, indent
+ 2);
595 file_cache::dump () const
600 /* Lookup the cache used for the content of a given file accessed by
601 caret diagnostic. If no cached file was found, create a new cache
602 for this file, add it to the array of cached file and return
605 This can return nullptr on a cache miss if FILE_PATH can't be opened for
606 reading, or if the content can't be converted to the input_charset. */
609 file_cache::lookup_or_add_file (const char *file_path
)
611 file_cache_slot
*r
= lookup_file (file_path
);
613 r
= add_file (file_path
);
617 /* Default constructor for a cache of file used by caret
620 file_cache_slot::file_cache_slot ()
621 : m_use_count (0), m_file_path (NULL
), m_fp (NULL
), m_error (false), m_data (0),
622 m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
623 m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
625 m_line_record
.create (0);
628 /* Destructor for a cache of file used by caret diagnostic. */
630 file_cache_slot::~file_cache_slot ()
639 offset_buffer (-m_alloc_offset
);
643 m_line_record
.release ();
647 file_cache_slot::dump (FILE *out
, int indent
) const
651 fprintf (out
, "%*s(unused)\n", indent
, "");
654 fprintf (out
, "%*sfile_path: %s\n", indent
, "", m_file_path
);
655 fprintf (out
, "%*sfp: %p\n", indent
, "", (void *)m_fp
);
656 fprintf (out
, "%*sneeds_read_p: %i\n", indent
, "", (int)needs_read_p ());
657 fprintf (out
, "%*sneeds_grow_p: %i\n", indent
, "", (int)needs_grow_p ());
658 fprintf (out
, "%*suse_count: %i\n", indent
, "", m_use_count
);
659 fprintf (out
, "%*ssize: %zi\n", indent
, "", m_size
);
660 fprintf (out
, "%*snb_read: %zi\n", indent
, "", m_nb_read
);
661 fprintf (out
, "%*sstart_line_idx: %zi\n", indent
, "", m_line_start_idx
);
662 fprintf (out
, "%*sline_num: %zi\n", indent
, "", m_line_num
);
663 fprintf (out
, "%*stotal_lines: %zi\n", indent
, "", m_total_lines
);
664 fprintf (out
, "%*smissing_trailing_newline: %i\n",
665 indent
, "", (int)m_missing_trailing_newline
);
666 fprintf (out
, "%*sline records (%i):\n",
667 indent
, "", m_line_record
.length ());
669 for (auto &line
: m_line_record
)
670 fprintf (out
, "%*s[%i]: line %zi: byte offsets: %zi-%zi\n",
672 idx
++, line
.line_num
, line
.start_pos
, line
.end_pos
);
675 /* Returns TRUE iff the cache would need to be filled with data coming
676 from the file. That is, either the cache is empty or full or the
677 current line is empty. Note that if the cache is full, it would
678 need to be extended and filled again. */
681 file_cache_slot::needs_read_p () const
683 return m_fp
&& (m_nb_read
== 0
684 || m_nb_read
== m_size
685 || (m_line_start_idx
>= m_nb_read
- 1));
688 /* Return TRUE iff the cache is full and thus needs to be
692 file_cache_slot::needs_grow_p () const
694 return m_nb_read
== m_size
;
697 /* Grow the cache if it needs to be extended. */
700 file_cache_slot::maybe_grow ()
702 if (!needs_grow_p ())
707 gcc_assert (m_size
== 0 && m_alloc_offset
== 0);
708 m_size
= buffer_size
;
709 m_data
= XNEWVEC (char, m_size
);
713 const int offset
= m_alloc_offset
;
714 offset_buffer (-offset
);
716 m_data
= XRESIZEVEC (char, m_data
, m_size
);
717 offset_buffer (offset
);
721 /* Read more data into the cache. Extends the cache if need be.
722 Returns TRUE iff new data could be read. */
725 file_cache_slot::read_data ()
727 if (feof (m_fp
) || ferror (m_fp
))
732 char * from
= m_data
+ m_nb_read
;
733 size_t to_read
= m_size
- m_nb_read
;
734 size_t nb_read
= fread (from
, 1, to_read
, m_fp
);
742 m_nb_read
+= nb_read
;
746 /* Read new data iff the cache needs to be filled with more data
747 coming from the file FP. Return TRUE iff the cache was filled with
751 file_cache_slot::maybe_read_data ()
753 if (!needs_read_p ())
758 /* Helper function for file_cache_slot::get_next_line (), to find the end of
759 the next line. Returns with the memchr convention, i.e. nullptr if a line
760 terminator was not found. We need to determine line endings in the same
761 manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
764 find_end_of_line (const char *s
, size_t len
)
766 for (const auto end
= s
+ len
; s
!= end
; ++s
)
772 const auto next
= s
+ 1;
775 /* Don't find the line ending if \r is the very last character
776 in the buffer; we do not know if it's the end of the file or
777 just the end of what has been read so far, and we wouldn't
778 want to break in the middle of what's actually a \r\n
779 sequence. Instead, we will handle the case of a file ending
783 return (*next
== '\n' ? next
: s
);
789 /* Read a new line from file FP, using C as a cache for the data
790 coming from the file. Upon successful completion, *LINE is set to
791 the beginning of the line found. *LINE points directly in the
792 line cache and is only valid until the next call of get_next_line.
793 *LINE_LEN is set to the length of the line. Note that the line
794 does not contain any terminal delimiter. This function returns
795 true if some data was read or process from the cache, false
796 otherwise. Note that subsequent calls to get_next_line might
797 make the content of *LINE invalid. */
800 file_cache_slot::get_next_line (char **line
, ssize_t
*line_len
)
802 /* Fill the cache with data to process. */
805 size_t remaining_size
= m_nb_read
- m_line_start_idx
;
806 if (remaining_size
== 0)
807 /* There is no more data to process. */
810 const char *line_start
= m_data
+ m_line_start_idx
;
812 const char *next_line_start
= NULL
;
814 const char *line_end
= find_end_of_line (line_start
, remaining_size
);
815 if (line_end
== NULL
)
817 /* We haven't found an end-of-line delimiter in the cache.
818 Fill the cache with more data from the file and look again. */
819 while (maybe_read_data ())
821 line_start
= m_data
+ m_line_start_idx
;
822 remaining_size
= m_nb_read
- m_line_start_idx
;
823 line_end
= find_end_of_line (line_start
, remaining_size
);
824 if (line_end
!= NULL
)
826 next_line_start
= line_end
+ 1;
830 if (line_end
== NULL
)
832 /* We've loaded all the file into the cache and still no
833 terminator. Let's say the line ends up at one byte past the
834 end of the file. This is to stay consistent with the case
835 of when the line ends up with a terminator and line_end points to
836 that. That consistency is useful below in the len calculation.
838 If the file ends in a \r, we didn't identify it as a line
839 terminator above, so do that now instead. */
840 line_end
= m_data
+ m_nb_read
;
841 if (m_nb_read
&& line_end
[-1] == '\r')
844 m_missing_trailing_newline
= false;
847 m_missing_trailing_newline
= true;
850 m_missing_trailing_newline
= false;
854 next_line_start
= line_end
+ 1;
855 m_missing_trailing_newline
= false;
861 /* At this point, we've found the end of the of line. It either points to
862 the line terminator or to one byte after the last byte of the file. */
863 gcc_assert (line_end
!= NULL
);
865 len
= line_end
- line_start
;
867 if (m_line_start_idx
< m_nb_read
)
868 *line
= const_cast<char *> (line_start
);
872 /* Before we update our line record, make sure the hint about the
873 total number of lines of the file is correct. If it's not, then
874 we give up recording line boundaries from now on. */
875 bool update_line_record
= true;
876 if (m_line_num
> m_total_lines
)
877 update_line_record
= false;
879 /* Now update our line record so that re-reading lines from the
880 before m_line_start_idx is faster. */
881 if (update_line_record
882 && m_line_record
.length () < line_record_size
)
884 /* If the file lines fits in the line record, we just record all
886 if (m_total_lines
<= line_record_size
887 && m_line_num
> m_line_record
.length ())
888 m_line_record
.safe_push
889 (file_cache_slot::line_info (m_line_num
,
892 else if (m_total_lines
> line_record_size
)
894 /* ... otherwise, we just scale total_lines down to
895 (line_record_size lines. */
896 size_t n
= (m_line_num
* line_record_size
) / m_total_lines
;
897 if (m_line_record
.length () == 0
898 || n
>= m_line_record
.length ())
899 m_line_record
.safe_push
900 (file_cache_slot::line_info (m_line_num
,
906 /* Update m_line_start_idx so that it points to the next line to be
909 m_line_start_idx
= next_line_start
- m_data
;
911 /* We didn't find any terminal '\n'. Let's consider that the end
912 of line is the end of the data in the cache. The next
913 invocation of get_next_line will either read more data from the
914 underlying file or return false early because we've reached the
916 m_line_start_idx
= m_nb_read
;
923 /* Consume the next bytes coming from the cache (or from its
924 underlying file if there are remaining unread bytes in the file)
925 until we reach the next end-of-line (or end-of-file). There is no
926 copying from the cache involved. Return TRUE upon successful
930 file_cache_slot::goto_next_line ()
935 return get_next_line (&l
, &len
);
938 /* Read an arbitrary line number LINE_NUM from the file cached in C.
939 If the line was read successfully, *LINE points to the beginning
940 of the line in the file cache and *LINE_LEN is the length of the
941 line. *LINE is not nul-terminated, but may contain zero bytes.
942 *LINE is only valid until the next call of read_line_num.
943 This function returns bool if a line was read. */
946 file_cache_slot::read_line_num (size_t line_num
,
947 char ** line
, ssize_t
*line_len
)
949 gcc_assert (line_num
> 0);
951 if (line_num
<= m_line_num
)
953 /* We've been asked to read lines that are before m_line_num.
954 So lets use our line record (if it's not empty) to try to
955 avoid re-reading the file from the beginning again. */
957 if (m_line_record
.is_empty ())
959 m_line_start_idx
= 0;
964 file_cache_slot::line_info
*i
= NULL
;
965 if (m_total_lines
<= line_record_size
)
967 /* In languages where the input file is not totally
968 preprocessed up front, the m_total_lines hint
969 can be smaller than the number of lines of the
970 file. In that case, only the first
971 m_total_lines have been recorded.
973 Otherwise, the first m_total_lines we've read have
974 their start/end recorded here. */
975 i
= (line_num
<= m_total_lines
)
976 ? &m_line_record
[line_num
- 1]
977 : &m_line_record
[m_total_lines
- 1];
978 gcc_assert (i
->line_num
<= line_num
);
982 /* So the file had more lines than our line record
983 size. Thus the number of lines we've recorded has
984 been scaled down to line_record_size. Let's
985 pick the start/end of the recorded line that is
986 closest to line_num. */
987 size_t n
= (line_num
<= m_total_lines
)
988 ? line_num
* line_record_size
/ m_total_lines
989 : m_line_record
.length () - 1;
990 if (n
< m_line_record
.length ())
992 i
= &m_line_record
[n
];
993 gcc_assert (i
->line_num
<= line_num
);
997 if (i
&& i
->line_num
== line_num
)
999 /* We have the start/end of the line. */
1000 *line
= m_data
+ i
->start_pos
;
1001 *line_len
= i
->end_pos
- i
->start_pos
;
1007 m_line_start_idx
= i
->start_pos
;
1008 m_line_num
= i
->line_num
- 1;
1012 m_line_start_idx
= 0;
1018 /* Let's walk from line m_line_num up to line_num - 1, without
1019 copying any line. */
1020 while (m_line_num
< line_num
- 1)
1021 if (!goto_next_line ())
1024 /* The line we want is the next one. Let's read and copy it back to
1026 return get_next_line (line
, line_len
);
1029 /* Return the physical source line that corresponds to FILE_PATH/LINE.
1030 The line is not nul-terminated. The returned pointer is only
1031 valid until the next call of location_get_source_line.
1032 Note that the line can contain several null characters,
1033 so the returned value's length has the actual length of the line.
1034 If the function fails, a NULL char_span is returned. */
1037 file_cache::get_source_line (const char *file_path
, int line
)
1039 char *buffer
= NULL
;
1043 return char_span (NULL
, 0);
1045 if (file_path
== NULL
)
1046 return char_span (NULL
, 0);
1048 file_cache_slot
*c
= lookup_or_add_file (file_path
);
1050 return char_span (NULL
, 0);
1052 bool read
= c
->read_line_num (line
, &buffer
, &len
);
1054 return char_span (NULL
, 0);
1056 return char_span (buffer
, len
);
1059 /* Return a NUL-terminated copy of the source text between two locations, or
1060 NULL if the arguments are invalid. The caller is responsible for freeing
1061 the return value. */
1064 get_source_text_between (file_cache
&fc
, location_t start
, location_t end
)
1066 expanded_location expstart
=
1067 expand_location_to_spelling_point (start
, LOCATION_ASPECT_START
);
1068 expanded_location expend
=
1069 expand_location_to_spelling_point (end
, LOCATION_ASPECT_FINISH
);
1071 /* If the locations are in different files or the end comes before the
1072 start, give up and return nothing. */
1073 if (!expstart
.file
|| !expend
.file
)
1075 if (strcmp (expstart
.file
, expend
.file
) != 0)
1077 if (expstart
.line
> expend
.line
)
1079 if (expstart
.line
== expend
.line
1080 && expstart
.column
> expend
.column
)
1082 /* These aren't real column numbers, give up. */
1083 if (expstart
.column
== 0 || expend
.column
== 0)
1086 /* For a single line we need to trim both edges. */
1087 if (expstart
.line
== expend
.line
)
1089 char_span line
= fc
.get_source_line (expstart
.file
, expstart
.line
);
1090 if (line
.length () < 1)
1092 int s
= expstart
.column
- 1;
1093 int len
= expend
.column
- s
;
1094 if (line
.length () < (size_t)expend
.column
)
1096 return line
.subspan (s
, len
).xstrdup ();
1099 struct obstack buf_obstack
;
1100 obstack_init (&buf_obstack
);
1102 /* Loop through all lines in the range and append each to buf; may trim
1103 parts of the start and end lines off depending on column values. */
1104 for (int lnum
= expstart
.line
; lnum
<= expend
.line
; ++lnum
)
1106 char_span line
= fc
.get_source_line (expstart
.file
, lnum
);
1107 if (line
.length () < 1 && (lnum
!= expstart
.line
&& lnum
!= expend
.line
))
1110 /* For the first line in the range, only start at expstart.column */
1111 if (lnum
== expstart
.line
)
1113 unsigned off
= expstart
.column
- 1;
1114 if (line
.length () < off
)
1116 line
= line
.subspan (off
, line
.length() - off
);
1118 /* For the last line, don't go past expend.column */
1119 else if (lnum
== expend
.line
)
1121 if (line
.length () < (size_t)expend
.column
)
1123 line
= line
.subspan (0, expend
.column
);
1126 /* Combine spaces at the beginning of later lines. */
1127 if (lnum
> expstart
.line
)
1130 for (off
= 0; off
< line
.length(); ++off
)
1131 if (line
[off
] != ' ' && line
[off
] != '\t')
1135 obstack_1grow (&buf_obstack
, ' ');
1136 line
= line
.subspan (off
, line
.length() - off
);
1140 /* This does not include any trailing newlines. */
1141 obstack_grow (&buf_obstack
, line
.get_buffer (), line
.length ());
1144 /* NUL-terminate and finish the buf obstack. */
1145 obstack_1grow (&buf_obstack
, 0);
1146 const char *buf
= (const char *) obstack_finish (&buf_obstack
);
1148 return xstrdup (buf
);
1153 file_cache::get_source_file_content (const char *file_path
)
1155 file_cache_slot
*c
= lookup_or_add_file (file_path
);
1157 return char_span (nullptr, 0);
1158 return c
->get_full_file_content ();
1161 /* Test if the location originates from the spelling location of a
1162 builtin-tokens. That is, return TRUE if LOC is a (possibly
1163 virtual) location of a built-in token that appears in the expansion
1164 list of a macro. Please note that this function also works on
1165 tokens that result from built-in tokens. For instance, the
1166 function would return true if passed a token "4" that is the result
1167 of the expansion of the built-in __LINE__ macro. */
1169 is_location_from_builtin_token (location_t loc
)
1171 const line_map_ordinary
*map
= NULL
;
1172 loc
= linemap_resolve_location (line_table
, loc
,
1173 LRK_SPELLING_LOCATION
, &map
);
1174 return loc
== BUILTINS_LOCATION
;
1177 /* Expand the source location LOC into a human readable location. If
1178 LOC is virtual, it resolves to the expansion point of the involved
1179 macro. If LOC resolves to a builtin location, the file name of the
1180 readable location is set to the string "<built-in>". */
1183 expand_location (location_t loc
)
1185 return expand_location_1 (line_table
, loc
, /*expansion_point_p=*/true,
1186 LOCATION_ASPECT_CARET
);
1189 /* Expand the source location LOC into a human readable location. If
1190 LOC is virtual, it resolves to the expansion location of the
1191 relevant macro. If LOC resolves to a builtin location, the file
1192 name of the readable location is set to the string
1196 expand_location_to_spelling_point (location_t loc
,
1197 enum location_aspect aspect
)
1199 return expand_location_1 (line_table
, loc
, /*expansion_point_p=*/false,
1203 /* The rich_location class within libcpp requires a way to expand
1204 location_t instances, and relies on the client code
1205 providing a symbol named
1206 linemap_client_expand_location_to_spelling_point
1209 This is the implementation for libcommon.a (all host binaries),
1210 which simply calls into expand_location_1. */
1213 linemap_client_expand_location_to_spelling_point (const line_maps
*set
,
1215 enum location_aspect aspect
)
1217 return expand_location_1 (set
, loc
, /*expansion_point_p=*/false, aspect
);
1221 /* If LOCATION is in a system header and if it is a virtual location
1222 for a token coming from the expansion of a macro, unwind it to
1223 the location of the expansion point of the macro. If the expansion
1224 point is also in a system header return the original LOCATION.
1225 Otherwise, return the location of the expansion point.
1227 This is used for instance when we want to emit diagnostics about a
1228 token that may be located in a macro that is itself defined in a
1229 system header, for example, for the NULL macro. In such a case, if
1230 LOCATION were passed directly to diagnostic functions such as
1231 warning_at, the diagnostic would be suppressed (unless
1232 -Wsystem-headers). */
1235 expansion_point_location_if_in_system_header (location_t location
)
1237 if (!in_system_header_at (location
))
1240 location_t xloc
= linemap_resolve_location (line_table
, location
,
1241 LRK_MACRO_EXPANSION_POINT
,
1243 return in_system_header_at (xloc
) ? location
: xloc
;
1246 /* If LOCATION is a virtual location for a token coming from the expansion
1247 of a macro, unwind to the location of the expansion point of the macro. */
1250 expansion_point_location (location_t location
)
1252 return linemap_resolve_location (line_table
, location
,
1253 LRK_MACRO_EXPANSION_POINT
, NULL
);
1256 /* Construct a location with caret at CARET, ranging from START to
1259 For example, consider:
1262 12345678901234567890
1264 523 return foo + bar;
1268 The location's caret is at the "+", line 523 column 15, but starts
1269 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1270 of "bar" at column 19. */
1273 make_location (location_t caret
, location_t start
, location_t finish
)
1275 return line_table
->make_location (caret
, start
, finish
);
1278 /* Same as above, but taking a source range rather than two locations. */
1281 make_location (location_t caret
, source_range src_range
)
1283 location_t pure_loc
= get_pure_location (caret
);
1284 return line_table
->get_or_create_combined_loc (pure_loc
, src_range
,
1288 /* An expanded_location stores the column in byte units. This function
1289 converts that column to display units. That requires reading the associated
1290 source line in order to calculate the display width. If that cannot be done
1291 for any reason, then returns the byte column as a fallback. */
1293 location_compute_display_column (file_cache
&fc
,
1294 expanded_location exploc
,
1295 const cpp_char_column_policy
&policy
)
1297 if (!(exploc
.file
&& *exploc
.file
&& exploc
.line
&& exploc
.column
))
1298 return exploc
.column
;
1299 char_span line
= fc
.get_source_line (exploc
.file
, exploc
.line
);
1300 /* If line is NULL, this function returns exploc.column which is the
1301 desired fallback. */
1302 return cpp_byte_column_to_display_column (line
.get_buffer (), line
.length (),
1303 exploc
.column
, policy
);
1306 /* Dump statistics to stderr about the memory usage of the line_table
1307 set of line maps. This also displays some statistics about macro
1311 dump_line_table_statistics (void)
1313 struct linemap_stats s
;
1314 long total_used_map_size
,
1316 total_allocated_map_size
;
1318 memset (&s
, 0, sizeof (s
));
1320 linemap_get_statistics (line_table
, &s
);
1322 macro_maps_size
= s
.macro_maps_used_size
1323 + s
.macro_maps_locations_size
;
1325 total_allocated_map_size
= s
.ordinary_maps_allocated_size
1326 + s
.macro_maps_allocated_size
1327 + s
.macro_maps_locations_size
;
1329 total_used_map_size
= s
.ordinary_maps_used_size
1330 + s
.macro_maps_used_size
1331 + s
.macro_maps_locations_size
;
1333 fprintf (stderr
, "Number of expanded macros: %5ld\n",
1334 s
.num_expanded_macros
);
1335 if (s
.num_expanded_macros
!= 0)
1336 fprintf (stderr
, "Average number of tokens per macro expansion: %5ld\n",
1337 s
.num_macro_tokens
/ s
.num_expanded_macros
);
1339 "\nLine Table allocations during the "
1340 "compilation process\n");
1341 fprintf (stderr
, "Number of ordinary maps used: " PRsa (5) "\n",
1342 SIZE_AMOUNT (s
.num_ordinary_maps_used
));
1343 fprintf (stderr
, "Ordinary map used size: " PRsa (5) "\n",
1344 SIZE_AMOUNT (s
.ordinary_maps_used_size
));
1345 fprintf (stderr
, "Number of ordinary maps allocated: " PRsa (5) "\n",
1346 SIZE_AMOUNT (s
.num_ordinary_maps_allocated
));
1347 fprintf (stderr
, "Ordinary maps allocated size: " PRsa (5) "\n",
1348 SIZE_AMOUNT (s
.ordinary_maps_allocated_size
));
1349 fprintf (stderr
, "Number of macro maps used: " PRsa (5) "\n",
1350 SIZE_AMOUNT (s
.num_macro_maps_used
));
1351 fprintf (stderr
, "Macro maps used size: " PRsa (5) "\n",
1352 SIZE_AMOUNT (s
.macro_maps_used_size
));
1353 fprintf (stderr
, "Macro maps locations size: " PRsa (5) "\n",
1354 SIZE_AMOUNT (s
.macro_maps_locations_size
));
1355 fprintf (stderr
, "Macro maps size: " PRsa (5) "\n",
1356 SIZE_AMOUNT (macro_maps_size
));
1357 fprintf (stderr
, "Duplicated maps locations size: " PRsa (5) "\n",
1358 SIZE_AMOUNT (s
.duplicated_macro_maps_locations_size
));
1359 fprintf (stderr
, "Total allocated maps size: " PRsa (5) "\n",
1360 SIZE_AMOUNT (total_allocated_map_size
));
1361 fprintf (stderr
, "Total used maps size: " PRsa (5) "\n",
1362 SIZE_AMOUNT (total_used_map_size
));
1363 fprintf (stderr
, "Ad-hoc table size: " PRsa (5) "\n",
1364 SIZE_AMOUNT (s
.adhoc_table_size
));
1365 fprintf (stderr
, "Ad-hoc table entries used: " PRsa (5) "\n",
1366 SIZE_AMOUNT (s
.adhoc_table_entries_used
));
1367 fprintf (stderr
, "optimized_ranges: " PRsa (5) "\n",
1368 SIZE_AMOUNT (line_table
->m_num_optimized_ranges
));
1369 fprintf (stderr
, "unoptimized_ranges: " PRsa (5) "\n",
1370 SIZE_AMOUNT (line_table
->m_num_unoptimized_ranges
));
1372 fprintf (stderr
, "\n");
1375 /* Get location one beyond the final location in ordinary map IDX. */
1378 get_end_location (class line_maps
*set
, line_map_uint_t idx
)
1380 if (idx
== LINEMAPS_ORDINARY_USED (set
) - 1)
1381 return set
->highest_location
;
1383 struct line_map
*next_map
= LINEMAPS_ORDINARY_MAP_AT (set
, idx
+ 1);
1384 return MAP_START_LOCATION (next_map
);
1387 /* Helper function for write_digit_row. */
1390 write_digit (FILE *stream
, int digit
)
1392 fputc ('0' + (digit
% 10), stream
);
1395 /* Helper function for dump_location_info.
1396 Write a row of numbers to STREAM, numbering a source line,
1397 giving the units, tens, hundreds etc of the column number. */
1400 write_digit_row (FILE *stream
, int indent
,
1401 const line_map_ordinary
*map
,
1402 location_t loc
, int max_col
, int divisor
)
1404 fprintf (stream
, "%*c", indent
, ' ');
1405 fprintf (stream
, "|");
1406 for (int column
= 1; column
< max_col
; column
++)
1408 location_t column_loc
= loc
+ (location_t (column
) << map
->m_range_bits
);
1409 write_digit (stream
, column_loc
/ divisor
);
1411 fprintf (stream
, "\n");
1414 /* Write a half-closed (START) / half-open (END) interval of
1415 location_t to STREAM. */
1418 dump_location_range (FILE *stream
,
1419 location_t start
, location_t end
)
1422 " location_t interval: %llu <= loc < %llu\n",
1423 (unsigned long long) start
, (unsigned long long) end
);
1426 /* Write a labelled description of a half-closed (START) / half-open (END)
1427 interval of location_t to STREAM. */
1430 dump_labelled_location_range (FILE *stream
,
1432 location_t start
, location_t end
)
1434 fprintf (stream
, "%s\n", name
);
1435 dump_location_range (stream
, start
, end
);
1436 fprintf (stream
, "\n");
1439 /* Write a visualization of the locations in the line_table to STREAM. */
1442 dump_location_info (FILE *stream
)
1446 /* Visualize the reserved locations. */
1447 dump_labelled_location_range (stream
, "RESERVED LOCATIONS",
1448 0, RESERVED_LOCATION_COUNT
);
1450 using ULL
= unsigned long long;
1452 /* Visualize the ordinary line_map instances, rendering the sources. */
1453 for (line_map_uint_t idx
= 0; idx
< LINEMAPS_ORDINARY_USED (line_table
);
1456 location_t end_location
= get_end_location (line_table
, idx
);
1457 /* half-closed: doesn't include this one. */
1459 const line_map_ordinary
*map
1460 = LINEMAPS_ORDINARY_MAP_AT (line_table
, idx
);
1461 fprintf (stream
, "ORDINARY MAP: %llu\n", (ULL
) idx
);
1462 dump_location_range (stream
,
1463 MAP_START_LOCATION (map
), end_location
);
1464 fprintf (stream
, " file: %s\n", ORDINARY_MAP_FILE_NAME (map
));
1465 fprintf (stream
, " starting at line: %i\n",
1466 ORDINARY_MAP_STARTING_LINE_NUMBER (map
));
1467 fprintf (stream
, " column and range bits: %i\n",
1468 map
->m_column_and_range_bits
);
1469 fprintf (stream
, " column bits: %i\n",
1470 map
->m_column_and_range_bits
- map
->m_range_bits
);
1471 fprintf (stream
, " range bits: %i\n",
1473 const char * reason
;
1474 switch (map
->reason
) {
1476 reason
= "LC_ENTER";
1479 reason
= "LC_LEAVE";
1482 reason
= "LC_RENAME";
1484 case LC_RENAME_VERBATIM
:
1485 reason
= "LC_RENAME_VERBATIM";
1487 case LC_ENTER_MACRO
:
1488 reason
= "LC_RENAME_MACRO";
1493 fprintf (stream
, " reason: %d (%s)\n", map
->reason
, reason
);
1495 const line_map_ordinary
*includer_map
1496 = linemap_included_from_linemap (line_table
, map
);
1497 fprintf (stream
, " included from location: %llu",
1498 (ULL
) linemap_included_from (map
));
1500 fprintf (stream
, " (in ordinary map %llu)",
1501 ULL (includer_map
- line_table
->info_ordinary
.maps
));
1503 fprintf (stream
, "\n");
1505 /* Render the span of source lines that this "map" covers. */
1506 for (location_t loc
= MAP_START_LOCATION (map
);
1508 loc
+= (location_t (1) << map
->m_range_bits
))
1510 gcc_assert (pure_location_p (line_table
, loc
) );
1512 expanded_location exploc
1513 = linemap_expand_location (line_table
, map
, loc
);
1515 if (exploc
.column
== 0)
1517 /* Beginning of a new source line: draw the line. */
1519 char_span line_text
= fc
.get_source_line (exploc
.file
,
1524 "%s:%3i|loc:%5llu|%.*s\n",
1525 exploc
.file
, exploc
.line
,
1527 (int)line_text
.length (), line_text
.get_buffer ());
1529 /* "loc" is at column 0, which means "the whole line".
1530 Render the locations *within* the line, by underlining
1531 it, showing the location_t numeric values
1533 auto max_col
= (ULL (1) << map
->m_column_and_range_bits
) - 1;
1534 if (max_col
> line_text
.length ())
1535 max_col
= line_text
.length () + 1;
1537 int len_lnum
= num_digits (exploc
.line
);
1540 int len_loc
= num_digits (loc
);
1544 int indent
= 6 + strlen (exploc
.file
) + len_lnum
+ len_loc
;
1547 if (end_location
> 999)
1548 write_digit_row (stream
, indent
, map
, loc
, max_col
, 1000);
1551 if (end_location
> 99)
1552 write_digit_row (stream
, indent
, map
, loc
, max_col
, 100);
1555 write_digit_row (stream
, indent
, map
, loc
, max_col
, 10);
1558 write_digit_row (stream
, indent
, map
, loc
, max_col
, 1);
1561 fprintf (stream
, "\n");
1564 /* Visualize unallocated values. */
1565 dump_labelled_location_range (stream
, "UNALLOCATED LOCATIONS",
1566 line_table
->highest_location
,
1567 LINEMAPS_MACRO_LOWEST_LOCATION (line_table
));
1569 /* Visualize the macro line_map instances, rendering the sources. */
1570 for (line_map_uint_t i
= 0; i
< LINEMAPS_MACRO_USED (line_table
); i
++)
1572 /* Each macro map that is allocated owns location_t values
1573 that are *lower* that the one before them.
1574 Hence it's meaningful to view them either in order of ascending
1575 source locations, or in order of ascending macro map index. */
1576 const bool ascending_location_ts
= true;
1577 auto idx
= (ascending_location_ts
1578 ? (LINEMAPS_MACRO_USED (line_table
) - (i
+ 1))
1580 const line_map_macro
*map
= LINEMAPS_MACRO_MAP_AT (line_table
, idx
);
1581 fprintf (stream
, "MACRO %llu: %s (%u tokens)\n",
1583 linemap_map_get_macro_name (map
),
1584 MACRO_MAP_NUM_MACRO_TOKENS (map
));
1585 dump_location_range (stream
,
1586 map
->start_location
,
1587 (map
->start_location
1588 + MACRO_MAP_NUM_MACRO_TOKENS (map
)));
1589 inform (map
->get_expansion_point_location (),
1590 "expansion point is location %llu",
1591 (ULL
) map
->get_expansion_point_location ());
1592 fprintf (stream
, " map->start_location: %llu\n",
1593 (ULL
) map
->start_location
);
1595 fprintf (stream
, " macro_locations:\n");
1596 for (unsigned int i
= 0; i
< MACRO_MAP_NUM_MACRO_TOKENS (map
); i
++)
1598 location_t x
= MACRO_MAP_LOCATIONS (map
)[2 * i
];
1599 location_t y
= MACRO_MAP_LOCATIONS (map
)[(2 * i
) + 1];
1601 /* linemap_add_macro_token encodes token numbers in an expansion
1602 by putting them after MAP_START_LOCATION. */
1604 /* I'm typically seeing 4 uninitialized entries at the end of
1606 This appears to be due to macro.cc:replace_args
1607 adding 2 extra args for padding tokens; presumably there may
1608 be a leading and/or trailing padding token injected,
1609 each for 2 more location slots.
1610 This would explain there being up to 4 location_ts slots
1611 that may be uninitialized. */
1613 fprintf (stream
, " %u: %llu, %llu\n",
1619 if (x
< MAP_START_LOCATION (map
))
1620 inform (x
, "token %u has %<x-location == y-location == %llu%>",
1624 "x-location == y-location == %llu"
1625 " encodes token # %u\n",
1627 (unsigned int)(x
- MAP_START_LOCATION (map
)));
1631 inform (x
, "token %u has %<x-location == %llu%>", i
, (ULL
) x
);
1632 inform (x
, "token %u has %<y-location == %llu%>", i
, (ULL
) y
);
1635 fprintf (stream
, "\n");
1638 /* It appears that MAX_LOCATION_T itself is never assigned to a
1639 macro map, presumably due to an off-by-one error somewhere
1640 between the logic in linemap_enter_macro and
1641 LINEMAPS_MACRO_LOWEST_LOCATION. */
1642 dump_labelled_location_range (stream
, "MAX_LOCATION_T",
1644 MAX_LOCATION_T
+ 1);
1646 /* Visualize ad-hoc values. */
1647 dump_labelled_location_range (stream
, "AD-HOC LOCATIONS",
1648 MAX_LOCATION_T
+ 1, location_t (-1));
1651 /* string_concat's constructor. */
1653 string_concat::string_concat (int num
, location_t
*locs
)
1656 m_locs
= ggc_vec_alloc
<location_t
> (num
);
1657 for (int i
= 0; i
< num
; i
++)
1658 m_locs
[i
] = locs
[i
];
1661 /* string_concat_db's constructor. */
1663 string_concat_db::string_concat_db ()
1665 m_table
= hash_map
<location_hash
, string_concat
*>::create_ggc (64);
1668 /* Record that a string concatenation occurred, covering NUM
1669 string literal tokens. LOCS is an array of size NUM, containing the
1670 locations of the tokens. A copy of LOCS is taken. */
1673 string_concat_db::record_string_concatenation (int num
, location_t
*locs
)
1675 gcc_assert (num
> 1);
1678 location_t key_loc
= get_key_loc (locs
[0]);
1679 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1680 any data now recorded under key 'key_loc' would be overwritten by a
1681 subsequent call with the same key 'key_loc'. */
1682 if (RESERVED_LOCATION_P (key_loc
))
1685 string_concat
*concat
1686 = new (ggc_alloc
<string_concat
> ()) string_concat (num
, locs
);
1687 m_table
->put (key_loc
, concat
);
1690 /* Determine if LOC was the location of the initial token of a
1691 concatenation of string literal tokens.
1692 If so, *OUT_NUM is written to with the number of tokens, and
1693 *OUT_LOCS with the location of an array of locations of the
1694 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1695 storage owned by the string_concat_db.
1696 Otherwise, return false. */
1699 string_concat_db::get_string_concatenation (location_t loc
,
1701 location_t
**out_locs
)
1703 gcc_assert (out_num
);
1704 gcc_assert (out_locs
);
1706 location_t key_loc
= get_key_loc (loc
);
1707 /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1708 discussion in 'string_concat_db::record_string_concatenation'. */
1709 if (RESERVED_LOCATION_P (key_loc
))
1712 string_concat
**concat
= m_table
->get (key_loc
);
1716 *out_num
= (*concat
)->m_num
;
1717 *out_locs
=(*concat
)->m_locs
;
1721 /* Internal function. Canonicalize LOC into a form suitable for
1722 use as a key within the database, stripping away macro expansion,
1723 ad-hoc information, and range information, using the location of
1724 the start of LOC within an ordinary linemap. */
1727 string_concat_db::get_key_loc (location_t loc
)
1729 loc
= linemap_resolve_location (line_table
, loc
, LRK_SPELLING_LOCATION
,
1732 loc
= get_range_from_loc (line_table
, loc
).m_start
;
1737 /* Helper class for use within get_substring_ranges_for_loc.
1738 An vec of cpp_string with responsibility for releasing all of the
1739 str->text for each str in the vector. */
1741 class auto_cpp_string_vec
: public auto_vec
<cpp_string
>
1744 auto_cpp_string_vec (int alloc
)
1745 : auto_vec
<cpp_string
> (alloc
) {}
1747 ~auto_cpp_string_vec ()
1749 /* Clean up the copies within this vec. */
1752 FOR_EACH_VEC_ELT (*this, i
, str
)
1753 free (const_cast <unsigned char *> (str
->text
));
1757 /* Attempt to populate RANGES with source location information on the
1758 individual characters within the string literal found at STRLOC.
1759 If CONCATS is non-NULL, then any string literals that the token at
1760 STRLOC was concatenated with are also added to RANGES.
1762 Return NULL if successful, or an error message if any errors occurred (in
1763 which case RANGES may be only partially populated and should not
1766 This is implemented by re-parsing the relevant source line(s). */
1769 get_substring_ranges_for_loc (cpp_reader
*pfile
,
1771 string_concat_db
*concats
,
1773 enum cpp_ttype type
,
1774 cpp_substring_ranges
&ranges
)
1778 if (strloc
== UNKNOWN_LOCATION
)
1779 return "unknown location";
1781 /* Reparsing the strings requires accurate location information.
1782 If -ftrack-macro-expansion has been overridden from its default
1783 of 2, then we might have a location of a macro expansion point,
1784 rather than the location of the literal itself.
1785 Avoid this by requiring that we have full macro expansion tracking
1786 for substring locations to be available. */
1787 if (cpp_get_options (pfile
)->track_macro_expansion
!= 2)
1788 return "track_macro_expansion != 2";
1790 /* If #line or # 44 "file"-style directives are present, then there's
1791 no guarantee that the line numbers we have can be used to locate
1792 the strings. For example, we might have a .i file with # directives
1793 pointing back to lines within a .c file, but the .c file might
1794 have been edited since the .i file was created.
1795 In such a case, the safest course is to disable on-demand substring
1797 if (line_table
->seen_line_directive
)
1798 return "seen line directive";
1800 /* If string concatenation has occurred at STRLOC, get the locations
1801 of all of the literal tokens making up the compound string.
1802 Otherwise, just use STRLOC. */
1804 location_t
*strlocs
= &strloc
;
1806 concats
->get_string_concatenation (strloc
, &num_locs
, &strlocs
);
1808 auto_cpp_string_vec
strs (num_locs
);
1809 auto_vec
<cpp_string_location_reader
> loc_readers (num_locs
);
1810 for (int i
= 0; i
< num_locs
; i
++)
1812 /* Get range of strloc. We will use it to locate the start and finish
1813 of the literal token within the line. */
1814 source_range src_range
= get_range_from_loc (line_table
, strlocs
[i
]);
1816 if (src_range
.m_start
>= LINEMAPS_MACRO_LOWEST_LOCATION (line_table
))
1818 /* If the string token was within a macro expansion, then we can
1819 cope with it for the simple case where we have a single token.
1820 Otherwise, bail out. */
1821 if (src_range
.m_start
!= src_range
.m_finish
)
1822 return "macro expansion";
1826 if (src_range
.m_start
>= LINE_MAP_MAX_LOCATION_WITH_COLS
)
1827 /* If so, we can't reliably determine where the token started within
1829 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1831 if (src_range
.m_finish
>= LINE_MAP_MAX_LOCATION_WITH_COLS
)
1832 /* If so, we can't reliably determine where the token finished
1834 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1837 expanded_location start
1838 = expand_location_to_spelling_point (src_range
.m_start
,
1839 LOCATION_ASPECT_START
);
1840 expanded_location finish
1841 = expand_location_to_spelling_point (src_range
.m_finish
,
1842 LOCATION_ASPECT_FINISH
);
1843 if (start
.file
!= finish
.file
)
1844 return "range endpoints are in different files";
1845 if (start
.line
!= finish
.line
)
1846 return "range endpoints are on different lines";
1847 if (start
.column
> finish
.column
)
1848 return "range endpoints are reversed";
1850 char_span line
= fc
.get_source_line (start
.file
, start
.line
);
1852 return "unable to read source line";
1854 /* Determine the location of the literal (including quotes
1855 and leading prefix chars, such as the 'u' in a u""
1857 size_t literal_length
= finish
.column
- start
.column
+ 1;
1859 /* Ensure that we don't crash if we got the wrong location. */
1860 if (start
.column
< 1)
1861 return "zero start column";
1862 if (line
.length () < (start
.column
- 1 + literal_length
))
1863 return "line is not wide enough";
1865 char_span literal
= line
.subspan (start
.column
- 1, literal_length
);
1868 from
.len
= literal_length
;
1869 /* Make a copy of the literal, to avoid having to rely on
1870 the lifetime of the copy of the line within the cache.
1871 This will be released by the auto_cpp_string_vec dtor. */
1872 from
.text
= (unsigned char *)literal
.xstrdup ();
1873 strs
.safe_push (from
);
1875 /* For very long lines, a new linemap could have started
1876 halfway through the token.
1877 Ensure that the loc_reader uses the linemap of the
1878 *end* of the token for its start location. */
1879 const line_map_ordinary
*start_ord_map
;
1880 linemap_resolve_location (line_table
, src_range
.m_start
,
1881 LRK_SPELLING_LOCATION
, &start_ord_map
);
1882 const line_map_ordinary
*final_ord_map
;
1883 linemap_resolve_location (line_table
, src_range
.m_finish
,
1884 LRK_SPELLING_LOCATION
, &final_ord_map
);
1885 if (start_ord_map
== NULL
|| final_ord_map
== NULL
)
1886 return "failed to get ordinary maps";
1887 /* Bulletproofing. We ought to only have different ordinary maps
1888 for start vs finish due to line-length jumps. */
1889 if (start_ord_map
!= final_ord_map
1890 && start_ord_map
->to_file
!= final_ord_map
->to_file
)
1891 return "start and finish are spelled in different ordinary maps";
1892 /* The file from linemap_resolve_location ought to match that from
1893 expand_location_to_spelling_point. */
1894 if (start_ord_map
->to_file
!= start
.file
)
1895 return "mismatching file after resolving linemap";
1897 location_t start_loc
1898 = linemap_position_for_line_and_column (line_table
, final_ord_map
,
1899 start
.line
, start
.column
);
1901 cpp_string_location_reader
loc_reader (start_loc
, line_table
);
1902 loc_readers
.safe_push (loc_reader
);
1905 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1906 const char *err
= cpp_interpret_string_ranges (pfile
, strs
.address (),
1907 loc_readers
.address (),
1908 num_locs
, &ranges
, type
);
1912 /* Success: "ranges" should now contain information on the string. */
1916 /* Attempt to populate *OUT_LOC with source location information on the
1917 given characters within the string literal found at STRLOC.
1918 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1921 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1922 and string literal "012345\n789"
1923 *OUT_LOC is written to with:
1927 If CONCATS is non-NULL, then any string literals that the token at
1928 STRLOC was concatenated with are also considered.
1930 This is implemented by re-parsing the relevant source line(s).
1932 Return NULL if successful, or an error message if any errors occurred.
1933 Error messages are intended for GCC developers (to help debugging) rather
1934 than for end-users. */
1937 get_location_within_string (cpp_reader
*pfile
,
1939 string_concat_db
*concats
,
1941 enum cpp_ttype type
,
1942 int caret_idx
, int start_idx
, int end_idx
,
1943 location_t
*out_loc
)
1945 gcc_checking_assert (caret_idx
>= 0);
1946 gcc_checking_assert (start_idx
>= 0);
1947 gcc_checking_assert (end_idx
>= 0);
1948 gcc_assert (out_loc
);
1950 cpp_substring_ranges ranges
;
1952 = get_substring_ranges_for_loc (pfile
, fc
, concats
, strloc
, type
, ranges
);
1956 if (caret_idx
>= ranges
.get_num_ranges ())
1957 return "caret_idx out of range";
1958 if (start_idx
>= ranges
.get_num_ranges ())
1959 return "start_idx out of range";
1960 if (end_idx
>= ranges
.get_num_ranges ())
1961 return "end_idx out of range";
1963 *out_loc
= make_location (ranges
.get_range (caret_idx
).m_start
,
1964 ranges
.get_range (start_idx
).m_start
,
1965 ranges
.get_range (end_idx
).m_finish
);
1969 /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1972 location_with_discriminator (location_t locus
, int discriminator
)
1974 tree block
= LOCATION_BLOCK (locus
);
1975 source_range src_range
= get_range_from_loc (line_table
, locus
);
1976 locus
= get_pure_location (locus
);
1978 if (locus
== UNKNOWN_LOCATION
)
1981 return line_table
->get_or_create_combined_loc (locus
, src_range
, block
,
1985 /* Return TRUE if LOCUS represents a location with a discriminator. */
1988 has_discriminator (location_t locus
)
1990 return get_discriminator_from_loc (locus
) != 0;
1993 /* Return the discriminator for LOCUS. */
1996 get_discriminator_from_loc (location_t locus
)
1998 return get_discriminator_from_loc (line_table
, locus
);
2003 namespace selftest
{
2005 /* Selftests of location handling. */
2007 /* Attempt to populate *OUT_RANGE with source location information on the
2008 given character within the string literal found at STRLOC.
2009 CHAR_IDX refers to an offset within the execution character set.
2010 If CONCATS is non-NULL, then any string literals that the token at
2011 STRLOC was concatenated with are also considered.
2013 This is implemented by re-parsing the relevant source line(s).
2015 Return NULL if successful, or an error message if any errors occurred.
2016 Error messages are intended for GCC developers (to help debugging) rather
2017 than for end-users. */
2020 get_source_range_for_char (cpp_reader
*pfile
,
2022 string_concat_db
*concats
,
2024 enum cpp_ttype type
,
2026 source_range
*out_range
)
2028 gcc_checking_assert (char_idx
>= 0);
2029 gcc_assert (out_range
);
2031 cpp_substring_ranges ranges
;
2033 = get_substring_ranges_for_loc (pfile
, fc
, concats
, strloc
, type
, ranges
);
2037 if (char_idx
>= ranges
.get_num_ranges ())
2038 return "char_idx out of range";
2040 *out_range
= ranges
.get_range (char_idx
);
2044 /* As get_source_range_for_char, but write to *OUT the number
2045 of ranges that are available. */
2048 get_num_source_ranges_for_substring (cpp_reader
*pfile
,
2050 string_concat_db
*concats
,
2052 enum cpp_ttype type
,
2057 cpp_substring_ranges ranges
;
2059 = get_substring_ranges_for_loc (pfile
, fc
, concats
, strloc
, type
, ranges
);
2064 *out
= ranges
.get_num_ranges ();
2068 /* Selftests of location handling. */
2070 /* Verify that compare() on linenum_type handles comparisons over the full
2071 range of the type. */
2074 test_linenum_comparisons ()
2076 linenum_type
min_line (0);
2077 linenum_type
max_line (0xffffffff);
2078 ASSERT_EQ (0, compare (min_line
, min_line
));
2079 ASSERT_EQ (0, compare (max_line
, max_line
));
2081 ASSERT_GT (compare (max_line
, min_line
), 0);
2082 ASSERT_LT (compare (min_line
, max_line
), 0);
2085 /* Helper function for verifying location data: when location_t
2086 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
2087 as having column 0. */
2090 should_have_column_data_p (location_t loc
)
2092 if (IS_ADHOC_LOC (loc
))
2093 loc
= get_location_from_adhoc_loc (line_table
, loc
);
2094 if (loc
> LINE_MAP_MAX_LOCATION_WITH_COLS
)
2099 /* Selftest for should_have_column_data_p. */
2102 test_should_have_column_data_p ()
2104 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT
));
2106 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS
));
2108 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS
+ 1));
2111 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
2115 assert_loceq (const char *exp_filename
, int exp_linenum
, int exp_colnum
,
2118 ASSERT_STREQ (exp_filename
, LOCATION_FILE (loc
));
2119 ASSERT_EQ (exp_linenum
, LOCATION_LINE (loc
));
2120 /* If location_t values are sufficiently high, then column numbers
2121 will be unavailable and LOCATION_COLUMN (loc) will be 0.
2122 When close to the threshold, column numbers *may* be present: if
2123 the final linemap before the threshold contains a line that straddles
2124 the threshold, locations in that line have column information. */
2125 if (should_have_column_data_p (loc
))
2126 ASSERT_EQ (exp_colnum
, LOCATION_COLUMN (loc
));
2129 /* Various selftests involve constructing a line table and one or more
2130 line maps within it.
2132 For maximum test coverage we want to run these tests with a variety
2134 - line_table->default_range_bits: some frontends use a non-zero value
2136 - the fallback modes within line-map.cc: there are various threshold
2137 values for location_t beyond line-map.cc changes
2138 behavior (disabling of the range-packing optimization, disabling
2139 of column-tracking). We can exercise these by starting the line_table
2140 at interesting values at or near these thresholds.
2142 The following struct describes a particular case within our test
2145 class line_table_case
2148 line_table_case (int default_range_bits
, location_t base_location
)
2149 : m_default_range_bits (default_range_bits
),
2150 m_base_location (base_location
)
2153 int m_default_range_bits
;
2154 location_t m_base_location
;
2157 /* Constructor. Store the old value of line_table, and create a new
2158 one, using sane defaults. */
2160 line_table_test::line_table_test ()
2162 gcc_assert (saved_line_table
== NULL
);
2163 saved_line_table
= line_table
;
2164 line_table
= ggc_alloc
<line_maps
> ();
2165 linemap_init (line_table
, BUILTINS_LOCATION
);
2166 gcc_assert (saved_line_table
->m_reallocator
);
2167 line_table
->m_reallocator
= saved_line_table
->m_reallocator
;
2168 gcc_assert (saved_line_table
->m_round_alloc_size
);
2169 line_table
->m_round_alloc_size
= saved_line_table
->m_round_alloc_size
;
2170 line_table
->default_range_bits
= 0;
2173 /* Constructor. Store the old value of line_table, and create a new
2174 one, using the sitation described in CASE_. */
2176 line_table_test::line_table_test (const line_table_case
&case_
)
2178 gcc_assert (saved_line_table
== NULL
);
2179 saved_line_table
= line_table
;
2180 line_table
= ggc_alloc
<line_maps
> ();
2181 linemap_init (line_table
, BUILTINS_LOCATION
);
2182 gcc_assert (saved_line_table
->m_reallocator
);
2183 line_table
->m_reallocator
= saved_line_table
->m_reallocator
;
2184 gcc_assert (saved_line_table
->m_round_alloc_size
);
2185 line_table
->m_round_alloc_size
= saved_line_table
->m_round_alloc_size
;
2186 line_table
->default_range_bits
= case_
.m_default_range_bits
;
2187 if (case_
.m_base_location
)
2189 line_table
->highest_location
= case_
.m_base_location
;
2190 line_table
->highest_line
= case_
.m_base_location
;
2194 /* Destructor. Restore the old value of line_table. */
2196 line_table_test::~line_table_test ()
2198 gcc_assert (saved_line_table
!= NULL
);
2199 line_table
= saved_line_table
;
2200 saved_line_table
= NULL
;
2203 /* Verify basic operation of ordinary linemaps. */
2206 test_accessing_ordinary_linemaps (const line_table_case
&case_
)
2208 line_table_test
ltt (case_
);
2210 /* Build a simple linemap describing some locations. */
2211 linemap_add (line_table
, LC_ENTER
, false, "foo.c", 0);
2213 linemap_line_start (line_table
, 1, 100);
2214 location_t loc_a
= linemap_position_for_column (line_table
, 1);
2215 location_t loc_b
= linemap_position_for_column (line_table
, 23);
2217 linemap_line_start (line_table
, 2, 100);
2218 location_t loc_c
= linemap_position_for_column (line_table
, 1);
2219 location_t loc_d
= linemap_position_for_column (line_table
, 17);
2221 /* Example of a very long line. */
2222 linemap_line_start (line_table
, 3, 2000);
2223 location_t loc_e
= linemap_position_for_column (line_table
, 700);
2225 /* Transitioning back to a short line. */
2226 linemap_line_start (line_table
, 4, 0);
2227 location_t loc_back_to_short
= linemap_position_for_column (line_table
, 100);
2229 if (should_have_column_data_p (loc_back_to_short
))
2231 /* Verify that we switched to short lines in the linemap. */
2232 line_map_ordinary
*map
= LINEMAPS_LAST_ORDINARY_MAP (line_table
);
2233 ASSERT_EQ (7, map
->m_column_and_range_bits
- map
->m_range_bits
);
2236 /* Example of a line that will eventually be seen to be longer
2237 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2239 linemap_line_start (line_table
, 5, 2000);
2241 location_t loc_start_of_very_long_line
2242 = linemap_position_for_column (line_table
, 2000);
2243 location_t loc_too_wide
2244 = linemap_position_for_column (line_table
, LINE_MAP_MAX_COLUMN_NUMBER
+ 1);
2245 location_t loc_too_wide_2
2246 = linemap_position_for_column (line_table
, LINE_MAP_MAX_COLUMN_NUMBER
+ 2);
2248 /* ...and back to a sane line length. */
2249 linemap_line_start (line_table
, 6, 100);
2250 location_t loc_sane_again
= linemap_position_for_column (line_table
, 10);
2252 linemap_add (line_table
, LC_LEAVE
, false, NULL
, 0);
2254 /* Multiple files. */
2255 linemap_add (line_table
, LC_ENTER
, false, "bar.c", 0);
2256 linemap_line_start (line_table
, 1, 200);
2257 location_t loc_f
= linemap_position_for_column (line_table
, 150);
2258 linemap_add (line_table
, LC_LEAVE
, false, NULL
, 0);
2260 /* Verify that we can recover the location info. */
2261 assert_loceq ("foo.c", 1, 1, loc_a
);
2262 assert_loceq ("foo.c", 1, 23, loc_b
);
2263 assert_loceq ("foo.c", 2, 1, loc_c
);
2264 assert_loceq ("foo.c", 2, 17, loc_d
);
2265 assert_loceq ("foo.c", 3, 700, loc_e
);
2266 assert_loceq ("foo.c", 4, 100, loc_back_to_short
);
2268 /* In the very wide line, the initial location should be fully tracked. */
2269 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line
);
2270 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2272 assert_loceq ("foo.c", 5, 0, loc_too_wide
);
2273 assert_loceq ("foo.c", 5, 0, loc_too_wide_2
);
2274 /*...and column-tracking should be re-enabled for subsequent lines. */
2275 assert_loceq ("foo.c", 6, 10, loc_sane_again
);
2277 assert_loceq ("bar.c", 1, 150, loc_f
);
2279 ASSERT_FALSE (is_location_from_builtin_token (loc_a
));
2280 ASSERT_TRUE (pure_location_p (line_table
, loc_a
));
2282 /* Verify using make_location to build a range, and extracting data
2284 location_t range_c_b_d
= make_location (loc_c
, loc_b
, loc_d
);
2285 ASSERT_FALSE (pure_location_p (line_table
, range_c_b_d
));
2286 ASSERT_EQ (loc_c
, get_location_from_adhoc_loc (line_table
, range_c_b_d
));
2287 source_range src_range
= get_range_from_loc (line_table
, range_c_b_d
);
2288 ASSERT_EQ (loc_b
, src_range
.m_start
);
2289 ASSERT_EQ (loc_d
, src_range
.m_finish
);
2292 /* Verify various properties of UNKNOWN_LOCATION. */
2295 test_unknown_location ()
2297 ASSERT_EQ (NULL
, LOCATION_FILE (UNKNOWN_LOCATION
));
2298 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION
));
2299 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION
));
2302 /* Verify various properties of BUILTINS_LOCATION. */
2307 assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION
);
2308 ASSERT_PRED1 (is_location_from_builtin_token
, BUILTINS_LOCATION
);
2311 /* Regression test for make_location.
2312 Ensure that we use pure locations for the start/finish of the range,
2313 rather than storing a packed or ad-hoc range as the start/finish. */
2316 test_make_location_nonpure_range_endpoints (const line_table_case
&case_
)
2318 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2320 ....................0000000001111111111222.
2321 ....................1234567890123456789012. */
2322 const char *content
= " r += !aaa == bbb;\n";
2323 temp_source_file
tmp (SELFTEST_LOCATION
, ".C", content
);
2324 line_table_test
ltt (case_
);
2325 linemap_add (line_table
, LC_ENTER
, false, tmp
.get_filename (), 1);
2327 const location_t c11
= linemap_position_for_column (line_table
, 11);
2328 const location_t c12
= linemap_position_for_column (line_table
, 12);
2329 const location_t c13
= linemap_position_for_column (line_table
, 13);
2330 const location_t c14
= linemap_position_for_column (line_table
, 14);
2331 const location_t c21
= linemap_position_for_column (line_table
, 21);
2333 if (c21
> LINE_MAP_MAX_LOCATION_WITH_COLS
)
2336 /* Use column 13 for the caret location, arbitrarily, to verify that we
2337 handle start != caret. */
2338 const location_t aaa
= make_location (c13
, c12
, c14
);
2339 ASSERT_EQ (c13
, get_pure_location (aaa
));
2340 ASSERT_EQ (c12
, get_start (aaa
));
2341 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa
)));
2342 ASSERT_EQ (c14
, get_finish (aaa
));
2343 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa
)));
2345 /* Make a location using a location with a range as the start-point. */
2346 const location_t not_aaa
= make_location (c11
, aaa
, c14
);
2347 ASSERT_EQ (c11
, get_pure_location (not_aaa
));
2348 /* It should use the start location of the range, not store the range
2350 ASSERT_EQ (c12
, get_start (not_aaa
));
2351 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa
)));
2352 ASSERT_EQ (c14
, get_finish (not_aaa
));
2353 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa
)));
2355 /* Similarly, make a location with a range as the end-point. */
2356 const location_t aaa_eq_bbb
= make_location (c12
, c12
, c21
);
2357 ASSERT_EQ (c12
, get_pure_location (aaa_eq_bbb
));
2358 ASSERT_EQ (c12
, get_start (aaa_eq_bbb
));
2359 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb
)));
2360 ASSERT_EQ (c21
, get_finish (aaa_eq_bbb
));
2361 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb
)));
2362 const location_t not_aaa_eq_bbb
= make_location (c11
, c12
, aaa_eq_bbb
);
2363 /* It should use the finish location of the range, not store the range
2365 ASSERT_EQ (c11
, get_pure_location (not_aaa_eq_bbb
));
2366 ASSERT_EQ (c12
, get_start (not_aaa_eq_bbb
));
2367 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb
)));
2368 ASSERT_EQ (c21
, get_finish (not_aaa_eq_bbb
));
2369 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb
)));
2372 /* Verify reading of input files (e.g. for caret-based diagnostics). */
2375 test_reading_source_line ()
2377 /* Create a tempfile and write some text to it. */
2378 temp_source_file
tmp (SELFTEST_LOCATION
, ".txt",
2379 "01234567890123456789\n"
2380 "This is the test text\n"
2381 "This is the 3rd line");
2384 /* Read back a specific line from the tempfile. */
2385 char_span source_line
= fc
.get_source_line (tmp
.get_filename (), 3);
2386 ASSERT_TRUE (source_line
);
2387 ASSERT_TRUE (source_line
.get_buffer () != NULL
);
2388 ASSERT_EQ (20, source_line
.length ());
2389 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2390 source_line
.get_buffer (), source_line
.length ()));
2392 source_line
= fc
.get_source_line (tmp
.get_filename (), 2);
2393 ASSERT_TRUE (source_line
);
2394 ASSERT_TRUE (source_line
.get_buffer () != NULL
);
2395 ASSERT_EQ (21, source_line
.length ());
2396 ASSERT_TRUE (!strncmp ("This is the test text",
2397 source_line
.get_buffer (), source_line
.length ()));
2399 source_line
= fc
.get_source_line (tmp
.get_filename (), 4);
2400 ASSERT_FALSE (source_line
);
2401 ASSERT_TRUE (source_line
.get_buffer () == NULL
);
2404 /* Verify reading from buffers (e.g. for sarif-replay). */
2407 test_reading_source_buffer ()
2409 const char *text
= ("01234567890123456789\n"
2410 "This is the test text\n"
2411 "This is the 3rd line");
2412 const char *filename
= "foo.txt";
2414 fc
.add_buffered_content (filename
, text
, strlen (text
));
2416 /* Read back a specific line from the tempfile. */
2417 char_span source_line
= fc
.get_source_line (filename
, 3);
2418 ASSERT_TRUE (source_line
);
2419 ASSERT_TRUE (source_line
.get_buffer () != NULL
);
2420 ASSERT_EQ (20, source_line
.length ());
2421 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2422 source_line
.get_buffer (), source_line
.length ()));
2424 source_line
= fc
.get_source_line (filename
, 2);
2425 ASSERT_TRUE (source_line
);
2426 ASSERT_TRUE (source_line
.get_buffer () != NULL
);
2427 ASSERT_EQ (21, source_line
.length ());
2428 ASSERT_TRUE (!strncmp ("This is the test text",
2429 source_line
.get_buffer (), source_line
.length ()));
2431 source_line
= fc
.get_source_line (filename
, 4);
2432 ASSERT_FALSE (source_line
);
2433 ASSERT_TRUE (source_line
.get_buffer () == NULL
);
2436 /* Tests of lexing. */
2438 /* Verify that token TOK from PARSER has cpp_token_as_text
2439 equal to EXPECTED_TEXT. */
2441 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2442 SELFTEST_BEGIN_STMT \
2443 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2444 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2447 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2448 and ranges from EXP_START_COL to EXP_FINISH_COL.
2449 Use LOC as the effective location of the selftest. */
2452 assert_token_loc_eq (const location
&loc
,
2453 const cpp_token
*tok
,
2454 const char *exp_filename
, int exp_linenum
,
2455 int exp_start_col
, int exp_finish_col
)
2457 location_t tok_loc
= tok
->src_loc
;
2458 ASSERT_STREQ_AT (loc
, exp_filename
, LOCATION_FILE (tok_loc
));
2459 ASSERT_EQ_AT (loc
, exp_linenum
, LOCATION_LINE (tok_loc
));
2461 /* If location_t values are sufficiently high, then column numbers
2462 will be unavailable. */
2463 if (!should_have_column_data_p (tok_loc
))
2466 ASSERT_EQ_AT (loc
, exp_start_col
, LOCATION_COLUMN (tok_loc
));
2467 source_range tok_range
= get_range_from_loc (line_table
, tok_loc
);
2468 ASSERT_EQ_AT (loc
, exp_start_col
, LOCATION_COLUMN (tok_range
.m_start
));
2469 ASSERT_EQ_AT (loc
, exp_finish_col
, LOCATION_COLUMN (tok_range
.m_finish
));
2472 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2473 SELFTEST_LOCATION as the effective location of the selftest. */
2475 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2476 EXP_START_COL, EXP_FINISH_COL) \
2477 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2478 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2480 /* Test of lexing a file using libcpp, verifying tokens and their
2481 location information. */
2484 test_lexer (const line_table_case
&case_
)
2486 /* Create a tempfile and write some text to it. */
2487 const char *content
=
2488 /*00000000011111111112222222222333333.3333444444444.455555555556
2489 12345678901234567890123456789012345.6789012345678.901234567890. */
2490 ("test_name /* c-style comment */\n"
2491 " \"test literal\"\n"
2492 " // test c++-style comment\n"
2494 temp_source_file
tmp (SELFTEST_LOCATION
, ".txt", content
);
2496 line_table_test
ltt (case_
);
2498 cpp_reader
*parser
= cpp_create_reader (CLK_GNUC89
, NULL
, line_table
);
2500 const char *fname
= cpp_read_main_file (parser
, tmp
.get_filename ());
2501 ASSERT_NE (fname
, NULL
);
2503 /* Verify that we get the expected tokens back, with the correct
2504 location information. */
2507 const cpp_token
*tok
;
2508 tok
= cpp_get_token_with_location (parser
, &loc
);
2509 ASSERT_NE (tok
, NULL
);
2510 ASSERT_EQ (tok
->type
, CPP_NAME
);
2511 ASSERT_TOKEN_AS_TEXT_EQ (parser
, tok
, "test_name");
2512 ASSERT_TOKEN_LOC_EQ (tok
, tmp
.get_filename (), 1, 1, 9);
2514 tok
= cpp_get_token_with_location (parser
, &loc
);
2515 ASSERT_NE (tok
, NULL
);
2516 ASSERT_EQ (tok
->type
, CPP_STRING
);
2517 ASSERT_TOKEN_AS_TEXT_EQ (parser
, tok
, "\"test literal\"");
2518 ASSERT_TOKEN_LOC_EQ (tok
, tmp
.get_filename (), 2, 35, 48);
2520 tok
= cpp_get_token_with_location (parser
, &loc
);
2521 ASSERT_NE (tok
, NULL
);
2522 ASSERT_EQ (tok
->type
, CPP_NUMBER
);
2523 ASSERT_TOKEN_AS_TEXT_EQ (parser
, tok
, "42");
2524 ASSERT_TOKEN_LOC_EQ (tok
, tmp
.get_filename (), 4, 4, 5);
2526 tok
= cpp_get_token_with_location (parser
, &loc
);
2527 ASSERT_NE (tok
, NULL
);
2528 ASSERT_EQ (tok
->type
, CPP_EOF
);
2530 cpp_finish (parser
, NULL
);
2531 cpp_destroy (parser
);
2534 /* Forward decls. */
2537 class lexer_test_options
;
2539 /* A class for specifying options of a lexer_test.
2540 The "apply" vfunc is called during the lexer_test constructor. */
2542 class lexer_test_options
2545 virtual void apply (lexer_test
&) = 0;
2548 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2551 This is needed by struct lexer_test to ensure that the cleanup of the
2552 cpp_reader happens *after* the cleanup of the temp_source_file. */
2554 class cpp_reader_ptr
2557 cpp_reader_ptr (cpp_reader
*ptr
) : m_ptr (ptr
) {}
2561 cpp_finish (m_ptr
, NULL
);
2562 cpp_destroy (m_ptr
);
2565 operator cpp_reader
* () const { return m_ptr
; }
2571 /* A struct for writing lexer tests. */
2576 lexer_test (const line_table_case
&case_
, const char *content
,
2577 lexer_test_options
*options
);
2580 const cpp_token
*get_token ();
2582 /* The ordering of these fields matters.
2583 The line_table_test must be first, since the cpp_reader_ptr
2585 The cpp_reader must be cleaned up *after* the temp_source_file
2586 since the filenames in input.cc's input cache are owned by the
2587 cpp_reader; in particular, when ~temp_source_file evicts the
2588 filename the filenames must still be alive. */
2589 line_table_test m_ltt
;
2590 cpp_reader_ptr m_parser
;
2591 temp_source_file m_tempfile
;
2592 file_cache m_file_cache
;
2593 string_concat_db m_concats
;
2594 bool m_implicitly_expect_EOF
;
2597 /* Use an EBCDIC encoding for the execution charset, specifically
2598 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2600 This exercises iconv integration within libcpp.
2601 Not every build of iconv supports the given charset,
2602 so we need to flag this error and handle it gracefully. */
2604 class ebcdic_execution_charset
: public lexer_test_options
2607 ebcdic_execution_charset () : m_num_iconv_errors (0)
2609 gcc_assert (s_singleton
== NULL
);
2612 ~ebcdic_execution_charset ()
2614 gcc_assert (s_singleton
== this);
2618 void apply (lexer_test
&test
) final override
2620 cpp_options
*cpp_opts
= cpp_get_options (test
.m_parser
);
2621 cpp_opts
->narrow_charset
= "IBM1047";
2623 cpp_callbacks
*callbacks
= cpp_get_callbacks (test
.m_parser
);
2624 callbacks
->diagnostic
= on_diagnostic
;
2627 static bool on_diagnostic (cpp_reader
*pfile ATTRIBUTE_UNUSED
,
2628 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED
,
2629 enum cpp_warning_reason reason ATTRIBUTE_UNUSED
,
2630 rich_location
*richloc ATTRIBUTE_UNUSED
,
2631 const char *msgid
, va_list *ap ATTRIBUTE_UNUSED
)
2632 ATTRIBUTE_FPTR_PRINTF(5,0)
2634 gcc_assert (s_singleton
);
2635 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2636 const char *msg
= "conversion from %s to %s not supported by iconv";
2638 msg
= dgettext ("cpplib", msg
);
2640 /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2641 when the local iconv build doesn't support the conversion. */
2642 if (strcmp (msgid
, msg
) == 0)
2644 s_singleton
->m_num_iconv_errors
++;
2648 /* Otherwise, we have an unexpected error. */
2652 bool iconv_errors_occurred_p () const { return m_num_iconv_errors
> 0; }
2655 static ebcdic_execution_charset
*s_singleton
;
2656 int m_num_iconv_errors
;
2659 ebcdic_execution_charset
*ebcdic_execution_charset::s_singleton
;
2661 /* A lexer_test_options subclass that records a list of diagnostic
2662 messages emitted by the lexer. */
2664 class lexer_diagnostic_sink
: public lexer_test_options
2667 lexer_diagnostic_sink ()
2669 gcc_assert (s_singleton
== NULL
);
2672 ~lexer_diagnostic_sink ()
2674 gcc_assert (s_singleton
== this);
2679 FOR_EACH_VEC_ELT (m_diagnostics
, i
, str
)
2683 void apply (lexer_test
&test
) final override
2685 cpp_callbacks
*callbacks
= cpp_get_callbacks (test
.m_parser
);
2686 callbacks
->diagnostic
= on_diagnostic
;
2689 static bool on_diagnostic (cpp_reader
*pfile ATTRIBUTE_UNUSED
,
2690 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED
,
2691 enum cpp_warning_reason reason ATTRIBUTE_UNUSED
,
2692 rich_location
*richloc ATTRIBUTE_UNUSED
,
2693 const char *msgid
, va_list *ap
)
2694 ATTRIBUTE_FPTR_PRINTF(5,0)
2696 char *msg
= xvasprintf (msgid
, *ap
);
2697 s_singleton
->m_diagnostics
.safe_push (msg
);
2701 auto_vec
<char *> m_diagnostics
;
2704 static lexer_diagnostic_sink
*s_singleton
;
2707 lexer_diagnostic_sink
*lexer_diagnostic_sink::s_singleton
;
2709 /* Constructor. Override line_table with a new instance based on CASE_,
2710 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2711 start parsing the tempfile. */
2713 lexer_test::lexer_test (const line_table_case
&case_
, const char *content
,
2714 lexer_test_options
*options
)
2716 m_parser (cpp_create_reader (CLK_GNUC99
, NULL
, line_table
)),
2717 /* Create a tempfile and write the text to it. */
2718 m_tempfile (SELFTEST_LOCATION
, ".c", content
),
2720 m_implicitly_expect_EOF (true)
2723 options
->apply (*this);
2725 cpp_init_iconv (m_parser
);
2727 /* Parse the file. */
2728 const char *fname
= cpp_read_main_file (m_parser
,
2729 m_tempfile
.get_filename ());
2730 ASSERT_NE (fname
, NULL
);
2733 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2735 lexer_test::~lexer_test ()
2738 const cpp_token
*tok
;
2740 if (m_implicitly_expect_EOF
)
2742 tok
= cpp_get_token_with_location (m_parser
, &loc
);
2743 ASSERT_NE (tok
, NULL
);
2744 ASSERT_EQ (tok
->type
, CPP_EOF
);
2748 /* Get the next token from m_parser. */
2751 lexer_test::get_token ()
2754 const cpp_token
*tok
;
2756 tok
= cpp_get_token_with_location (m_parser
, &loc
);
2757 ASSERT_NE (tok
, NULL
);
2761 /* Verify that locations within string literals are correctly handled. */
2763 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2764 using the string concatenation database for TEST.
2766 Assert that the character at index IDX is on EXPECTED_LINE,
2767 and that it begins at column EXPECTED_START_COL and ends at
2768 EXPECTED_FINISH_COL (unless the locations are beyond
2769 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2773 assert_char_at_range (const location
&loc
,
2775 location_t strloc
, enum cpp_ttype type
, int idx
,
2776 int expected_line
, int expected_start_col
,
2777 int expected_finish_col
)
2779 cpp_reader
*pfile
= test
.m_parser
;
2780 string_concat_db
*concats
= &test
.m_concats
;
2782 source_range actual_range
= source_range();
2784 = get_source_range_for_char (pfile
, test
.m_file_cache
,
2785 concats
, strloc
, type
, idx
,
2787 if (should_have_column_data_p (strloc
))
2788 ASSERT_EQ_AT (loc
, NULL
, err
);
2791 ASSERT_STREQ_AT (loc
,
2792 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2797 int actual_start_line
= LOCATION_LINE (actual_range
.m_start
);
2798 ASSERT_EQ_AT (loc
, expected_line
, actual_start_line
);
2799 int actual_finish_line
= LOCATION_LINE (actual_range
.m_finish
);
2800 ASSERT_EQ_AT (loc
, expected_line
, actual_finish_line
);
2802 if (should_have_column_data_p (actual_range
.m_start
))
2804 int actual_start_col
= LOCATION_COLUMN (actual_range
.m_start
);
2805 ASSERT_EQ_AT (loc
, expected_start_col
, actual_start_col
);
2807 if (should_have_column_data_p (actual_range
.m_finish
))
2809 int actual_finish_col
= LOCATION_COLUMN (actual_range
.m_finish
);
2810 ASSERT_EQ_AT (loc
, expected_finish_col
, actual_finish_col
);
2814 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2815 the effective location of any errors. */
2817 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2818 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2819 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2820 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2821 (EXPECTED_FINISH_COL))
2823 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2824 using the string concatenation database for TEST.
2826 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2829 assert_num_substring_ranges (const location
&loc
,
2832 enum cpp_ttype type
,
2833 int expected_num_ranges
)
2835 cpp_reader
*pfile
= test
.m_parser
;
2836 string_concat_db
*concats
= &test
.m_concats
;
2838 int actual_num_ranges
= -1;
2840 = get_num_source_ranges_for_substring (pfile
, test
.m_file_cache
,
2841 concats
, strloc
, type
,
2842 &actual_num_ranges
);
2843 if (should_have_column_data_p (strloc
))
2844 ASSERT_EQ_AT (loc
, NULL
, err
);
2847 ASSERT_STREQ_AT (loc
,
2848 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2852 ASSERT_EQ_AT (loc
, expected_num_ranges
, actual_num_ranges
);
2855 /* Macro for calling assert_num_substring_ranges, supplying
2856 SELFTEST_LOCATION for the effective location of any errors. */
2858 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2859 EXPECTED_NUM_RANGES) \
2860 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2861 (TYPE), (EXPECTED_NUM_RANGES))
2864 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2865 returns an error (using the string concatenation database for TEST). */
2868 assert_has_no_substring_ranges (const location
&loc
,
2871 enum cpp_ttype type
,
2872 const char *expected_err
)
2874 cpp_reader
*pfile
= test
.m_parser
;
2875 string_concat_db
*concats
= &test
.m_concats
;
2876 cpp_substring_ranges ranges
;
2877 const char *actual_err
2878 = get_substring_ranges_for_loc (pfile
, test
.m_file_cache
, concats
, strloc
,
2880 if (should_have_column_data_p (strloc
))
2881 ASSERT_STREQ_AT (loc
, expected_err
, actual_err
);
2883 ASSERT_STREQ_AT (loc
,
2884 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2888 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2889 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2890 (STRLOC), (TYPE), (ERR))
2892 /* Lex a simple string literal. Verify the substring location data, before
2893 and after running cpp_interpret_string on it. */
2896 test_lexer_string_locations_simple (const line_table_case
&case_
)
2898 /* Digits 0-9 (with 0 at column 10), the simple way.
2899 ....................000000000.11111111112.2222222223333333333
2900 ....................123456789.01234567890.1234567890123456789
2901 We add a trailing comment to ensure that we correctly locate
2902 the end of the string literal token. */
2903 const char *content
= " \"0123456789\" /* not a string */\n";
2904 lexer_test
test (case_
, content
, NULL
);
2906 /* Verify that we get the expected token back, with the correct
2907 location information. */
2908 const cpp_token
*tok
= test
.get_token ();
2909 ASSERT_EQ (tok
->type
, CPP_STRING
);
2910 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"0123456789\"");
2911 ASSERT_TOKEN_LOC_EQ (tok
, test
.m_tempfile
.get_filename (), 1, 9, 20);
2913 /* At this point in lexing, the quote characters are treated as part of
2914 the string (they are stripped off by cpp_interpret_string). */
2916 ASSERT_EQ (tok
->val
.str
.len
, 12);
2918 /* Verify that cpp_interpret_string works. */
2919 cpp_string dst_string
;
2920 const enum cpp_ttype type
= CPP_STRING
;
2921 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2923 ASSERT_TRUE (result
);
2924 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
2925 free (const_cast <unsigned char *> (dst_string
.text
));
2927 /* Verify ranges of individual characters. This no longer includes the
2928 opening quote, but does include the closing quote. */
2929 for (int i
= 0; i
<= 10; i
++)
2930 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1,
2933 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 11);
2936 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2940 test_lexer_string_locations_ebcdic (const line_table_case
&case_
)
2942 /* EBCDIC support requires iconv. */
2946 /* Digits 0-9 (with 0 at column 10), the simple way.
2947 ....................000000000.11111111112.2222222223333333333
2948 ....................123456789.01234567890.1234567890123456789
2949 We add a trailing comment to ensure that we correctly locate
2950 the end of the string literal token. */
2951 const char *content
= " \"0123456789\" /* not a string */\n";
2952 ebcdic_execution_charset use_ebcdic
;
2953 lexer_test
test (case_
, content
, &use_ebcdic
);
2955 /* Verify that we get the expected token back, with the correct
2956 location information. */
2957 const cpp_token
*tok
= test
.get_token ();
2958 ASSERT_EQ (tok
->type
, CPP_STRING
);
2959 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"0123456789\"");
2960 ASSERT_TOKEN_LOC_EQ (tok
, test
.m_tempfile
.get_filename (), 1, 9, 20);
2962 /* At this point in lexing, the quote characters are treated as part of
2963 the string (they are stripped off by cpp_interpret_string). */
2965 ASSERT_EQ (tok
->val
.str
.len
, 12);
2967 /* The remainder of the test requires an iconv implementation that
2968 can convert from UTF-8 to the EBCDIC encoding requested above. */
2969 if (use_ebcdic
.iconv_errors_occurred_p ())
2972 /* Verify that cpp_interpret_string works. */
2973 cpp_string dst_string
;
2974 const enum cpp_ttype type
= CPP_STRING
;
2975 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
2977 ASSERT_TRUE (result
);
2978 /* We should now have EBCDIC-encoded text, specifically
2979 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2980 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2981 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2982 (const char *)dst_string
.text
);
2983 free (const_cast <unsigned char *> (dst_string
.text
));
2985 /* Verify that we don't attempt to record substring location information
2987 ASSERT_HAS_NO_SUBSTRING_RANGES
2988 (test
, tok
->src_loc
, type
,
2989 "execution character set != source character set");
2992 /* Lex a string literal containing a hex-escaped character.
2993 Verify the substring location data, before and after running
2994 cpp_interpret_string on it. */
2997 test_lexer_string_locations_hex (const line_table_case
&case_
)
2999 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
3000 and with a space in place of digit 6, to terminate the escaped
3002 ....................000000000.111111.11112222.
3003 ....................123456789.012345.67890123. */
3004 const char *content
= " \"01234\\x35 789\"\n";
3005 lexer_test
test (case_
, content
, NULL
);
3007 /* Verify that we get the expected token back, with the correct
3008 location information. */
3009 const cpp_token
*tok
= test
.get_token ();
3010 ASSERT_EQ (tok
->type
, CPP_STRING
);
3011 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"01234\\x35 789\"");
3012 ASSERT_TOKEN_LOC_EQ (tok
, test
.m_tempfile
.get_filename (), 1, 9, 23);
3014 /* At this point in lexing, the quote characters are treated as part of
3015 the string (they are stripped off by cpp_interpret_string). */
3016 ASSERT_EQ (tok
->val
.str
.len
, 15);
3018 /* Verify that cpp_interpret_string works. */
3019 cpp_string dst_string
;
3020 const enum cpp_ttype type
= CPP_STRING
;
3021 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3023 ASSERT_TRUE (result
);
3024 ASSERT_STREQ ("012345 789", (const char *)dst_string
.text
);
3025 free (const_cast <unsigned char *> (dst_string
.text
));
3027 /* Verify ranges of individual characters. This no longer includes the
3028 opening quote, but does include the closing quote. */
3029 for (int i
= 0; i
<= 4; i
++)
3030 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
3031 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, 5, 1, 15, 18);
3032 for (int i
= 6; i
<= 10; i
++)
3033 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 13 + i
, 13 + i
);
3035 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 11);
3038 /* Lex a string literal containing an octal-escaped character.
3039 Verify the substring location data after running cpp_interpret_string
3043 test_lexer_string_locations_oct (const line_table_case
&case_
)
3045 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
3046 and with a space in place of digit 6, to terminate the escaped
3048 ....................000000000.111111.11112222.2222223333333333444
3049 ....................123456789.012345.67890123.4567890123456789012 */
3050 const char *content
= " \"01234\\065 789\" /* not a string */\n";
3051 lexer_test
test (case_
, content
, NULL
);
3053 /* Verify that we get the expected token back, with the correct
3054 location information. */
3055 const cpp_token
*tok
= test
.get_token ();
3056 ASSERT_EQ (tok
->type
, CPP_STRING
);
3057 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"01234\\065 789\"");
3059 /* Verify that cpp_interpret_string works. */
3060 cpp_string dst_string
;
3061 const enum cpp_ttype type
= CPP_STRING
;
3062 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3064 ASSERT_TRUE (result
);
3065 ASSERT_STREQ ("012345 789", (const char *)dst_string
.text
);
3066 free (const_cast <unsigned char *> (dst_string
.text
));
3068 /* Verify ranges of individual characters. This no longer includes the
3069 opening quote, but does include the closing quote. */
3070 for (int i
= 0; i
< 5; i
++)
3071 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
3072 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, 5, 1, 15, 18);
3073 for (int i
= 6; i
<= 10; i
++)
3074 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 13 + i
, 13 + i
);
3076 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 11);
3079 /* Test of string literal containing letter escapes. */
3082 test_lexer_string_locations_letter_escape_1 (const line_table_case
&case_
)
3084 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
3085 .....................000000000.1.11111.1.1.11222.22222223333333
3086 .....................123456789.0.12345.6.7.89012.34567890123456. */
3087 const char *content
= (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
3088 lexer_test
test (case_
, content
, NULL
);
3090 /* Verify that we get the expected tokens back. */
3091 const cpp_token
*tok
= test
.get_token ();
3092 ASSERT_EQ (tok
->type
, CPP_STRING
);
3093 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"\\tfoo\\\\\\nbar\"");
3095 /* Verify ranges of individual characters. */
3097 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3100 for (int i
= 1; i
<= 3; i
++)
3101 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3102 i
, 1, 11 + i
, 11 + i
);
3103 /* "\\" and "\n". */
3104 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3106 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3109 /* "bar" and closing quote for nul-terminator. */
3110 for (int i
= 6; i
<= 9; i
++)
3111 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3112 i
, 1, 13 + i
, 13 + i
);
3114 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
, 10);
3117 /* Another test of a string literal containing a letter escape.
3118 Based on string seen in
3120 in gcc.dg/format/c90-printf-1.c. */
3123 test_lexer_string_locations_letter_escape_2 (const line_table_case
&case_
)
3125 /* .....................000000000.1111.11.1111.22222222223.
3126 .....................123456789.0123.45.6789.01234567890. */
3127 const char *content
= (" \"%-%\\n\" /* non-str */\n");
3128 lexer_test
test (case_
, content
, NULL
);
3130 /* Verify that we get the expected tokens back. */
3131 const cpp_token
*tok
= test
.get_token ();
3132 ASSERT_EQ (tok
->type
, CPP_STRING
);
3133 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"%-%\\n\"");
3135 /* Verify ranges of individual characters. */
3137 for (int i
= 0; i
< 3; i
++)
3138 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3139 i
, 1, 10 + i
, 10 + i
);
3141 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3144 /* Closing quote for nul-terminator. */
3145 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3148 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
, 5);
3151 /* Lex a string literal containing UCN 4 characters.
3152 Verify the substring location data after running cpp_interpret_string
3156 test_lexer_string_locations_ucn4 (const line_table_case
&case_
)
3158 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
3160 ....................000000000.111111.111122.222222223.33333333344444
3161 ....................123456789.012345.678901.234567890.12345678901234 */
3162 const char *content
= " \"01234\\u2174\\u2175789\" /* non-str */\n";
3163 lexer_test
test (case_
, content
, NULL
);
3165 /* Verify that we get the expected token back, with the correct
3166 location information. */
3167 const cpp_token
*tok
= test
.get_token ();
3168 ASSERT_EQ (tok
->type
, CPP_STRING
);
3169 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"01234\\u2174\\u2175789\"");
3171 /* Verify that cpp_interpret_string works.
3172 The string should be encoded in the execution character
3173 set. Assuming that is UTF-8, we should have the following:
3174 ----------- ---- ----- ------- ----------------
3175 Byte offset Byte Octal Unicode Source Column(s)
3176 ----------- ---- ----- ------- ----------------
3182 5 0xE2 \342 U+2174 15-20
3183 6 0x85 \205 (cont) 15-20
3184 7 0xB4 \264 (cont) 15-20
3185 8 0xE2 \342 U+2175 21-26
3186 9 0x85 \205 (cont) 21-26
3187 10 0xB5 \265 (cont) 21-26
3191 14 0x00 30 (closing quote)
3192 ----------- ---- ----- ------- ---------------. */
3194 cpp_string dst_string
;
3195 const enum cpp_ttype type
= CPP_STRING
;
3196 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3198 ASSERT_TRUE (result
);
3199 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3200 (const char *)dst_string
.text
);
3201 free (const_cast <unsigned char *> (dst_string
.text
));
3203 /* Verify ranges of individual characters. This no longer includes the
3204 opening quote, but does include the closing quote.
3206 for (int i
= 0; i
<= 4; i
++)
3207 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
3209 for (int i
= 5; i
<= 7; i
++)
3210 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 15, 20);
3212 for (int i
= 8; i
<= 10; i
++)
3213 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 21, 26);
3214 /* '789' and nul terminator */
3215 for (int i
= 11; i
<= 14; i
++)
3216 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 16 + i
, 16 + i
);
3218 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 15);
3221 /* Lex a string literal containing UCN 8 characters.
3222 Verify the substring location data after running cpp_interpret_string
3226 test_lexer_string_locations_ucn8 (const line_table_case
&case_
)
3228 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
3229 ....................000000000.111111.1111222222.2222333333333.344444
3230 ....................123456789.012345.6789012345.6789012345678.901234 */
3231 const char *content
= " \"01234\\U00002174\\U00002175789\" /* */\n";
3232 lexer_test
test (case_
, content
, NULL
);
3234 /* Verify that we get the expected token back, with the correct
3235 location information. */
3236 const cpp_token
*tok
= test
.get_token ();
3237 ASSERT_EQ (tok
->type
, CPP_STRING
);
3238 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
,
3239 "\"01234\\U00002174\\U00002175789\"");
3241 /* Verify that cpp_interpret_string works.
3242 The UTF-8 encoding of the string is identical to that from
3243 the ucn4 testcase above; the only difference is the column
3245 cpp_string dst_string
;
3246 const enum cpp_ttype type
= CPP_STRING
;
3247 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3249 ASSERT_TRUE (result
);
3250 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3251 (const char *)dst_string
.text
);
3252 free (const_cast <unsigned char *> (dst_string
.text
));
3254 /* Verify ranges of individual characters. This no longer includes the
3255 opening quote, but does include the closing quote.
3257 for (int i
= 0; i
<= 4; i
++)
3258 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
3260 for (int i
= 5; i
<= 7; i
++)
3261 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 15, 24);
3263 for (int i
= 8; i
<= 10; i
++)
3264 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 25, 34);
3265 /* '789' at columns 35-37 */
3266 for (int i
= 11; i
<= 13; i
++)
3267 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 24 + i
, 24 + i
);
3268 /* Closing quote/nul-terminator at column 38. */
3269 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, 14, 1, 38, 38);
3271 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 15);
3274 /* Fetch a big-endian 32-bit value and convert to host endianness. */
3277 uint32_from_big_endian (const uint32_t *ptr_be_value
)
3279 const unsigned char *buf
= (const unsigned char *)ptr_be_value
;
3280 return (((uint32_t) buf
[0] << 24)
3281 | ((uint32_t) buf
[1] << 16)
3282 | ((uint32_t) buf
[2] << 8)
3283 | (uint32_t) buf
[3]);
3286 /* Lex a wide string literal and verify that attempts to read substring
3287 location data from it fail gracefully. */
3290 test_lexer_string_locations_wide_string (const line_table_case
&case_
)
3293 ....................000000000.11111111112.22222222233333
3294 ....................123456789.01234567890.12345678901234 */
3295 const char *content
= " L\"0123456789\" /* non-str */\n";
3296 lexer_test
test (case_
, content
, NULL
);
3298 /* Verify that we get the expected token back, with the correct
3299 location information. */
3300 const cpp_token
*tok
= test
.get_token ();
3301 ASSERT_EQ (tok
->type
, CPP_WSTRING
);
3302 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "L\"0123456789\"");
3304 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
3305 cpp_string dst_string
;
3306 const enum cpp_ttype type
= CPP_WSTRING
;
3307 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3309 ASSERT_TRUE (result
);
3310 /* The cpp_reader defaults to big-endian with
3311 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3312 now be encoded as UTF-32BE. */
3313 const uint32_t *be32_chars
= (const uint32_t *)dst_string
.text
;
3314 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars
[0]));
3315 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars
[5]));
3316 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars
[9]));
3317 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars
[10]));
3318 free (const_cast <unsigned char *> (dst_string
.text
));
3320 /* We don't yet support generating substring location information
3322 ASSERT_HAS_NO_SUBSTRING_RANGES
3323 (test
, tok
->src_loc
, type
,
3324 "execution character set != source character set");
3327 /* Fetch a big-endian 16-bit value and convert to host endianness. */
3330 uint16_from_big_endian (const uint16_t *ptr_be_value
)
3332 const unsigned char *buf
= (const unsigned char *)ptr_be_value
;
3333 return ((uint16_t) buf
[0] << 8) | (uint16_t) buf
[1];
3336 /* Lex a u"" string literal and verify that attempts to read substring
3337 location data from it fail gracefully. */
3340 test_lexer_string_locations_string16 (const line_table_case
&case_
)
3343 ....................000000000.11111111112.22222222233333
3344 ....................123456789.01234567890.12345678901234 */
3345 const char *content
= " u\"0123456789\" /* non-str */\n";
3346 lexer_test
test (case_
, content
, NULL
);
3348 /* Verify that we get the expected token back, with the correct
3349 location information. */
3350 const cpp_token
*tok
= test
.get_token ();
3351 ASSERT_EQ (tok
->type
, CPP_STRING16
);
3352 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "u\"0123456789\"");
3354 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3355 cpp_string dst_string
;
3356 const enum cpp_ttype type
= CPP_STRING16
;
3357 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3359 ASSERT_TRUE (result
);
3361 /* The cpp_reader defaults to big-endian, so dst_string should
3362 now be encoded as UTF-16BE. */
3363 const uint16_t *be16_chars
= (const uint16_t *)dst_string
.text
;
3364 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars
[0]));
3365 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars
[5]));
3366 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars
[9]));
3367 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars
[10]));
3368 free (const_cast <unsigned char *> (dst_string
.text
));
3370 /* We don't yet support generating substring location information
3372 ASSERT_HAS_NO_SUBSTRING_RANGES
3373 (test
, tok
->src_loc
, type
,
3374 "execution character set != source character set");
3377 /* Lex a U"" string literal and verify that attempts to read substring
3378 location data from it fail gracefully. */
3381 test_lexer_string_locations_string32 (const line_table_case
&case_
)
3384 ....................000000000.11111111112.22222222233333
3385 ....................123456789.01234567890.12345678901234 */
3386 const char *content
= " U\"0123456789\" /* non-str */\n";
3387 lexer_test
test (case_
, content
, NULL
);
3389 /* Verify that we get the expected token back, with the correct
3390 location information. */
3391 const cpp_token
*tok
= test
.get_token ();
3392 ASSERT_EQ (tok
->type
, CPP_STRING32
);
3393 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "U\"0123456789\"");
3395 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3396 cpp_string dst_string
;
3397 const enum cpp_ttype type
= CPP_STRING32
;
3398 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3400 ASSERT_TRUE (result
);
3402 /* The cpp_reader defaults to big-endian, so dst_string should
3403 now be encoded as UTF-32BE. */
3404 const uint32_t *be32_chars
= (const uint32_t *)dst_string
.text
;
3405 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars
[0]));
3406 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars
[5]));
3407 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars
[9]));
3408 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars
[10]));
3409 free (const_cast <unsigned char *> (dst_string
.text
));
3411 /* We don't yet support generating substring location information
3413 ASSERT_HAS_NO_SUBSTRING_RANGES
3414 (test
, tok
->src_loc
, type
,
3415 "execution character set != source character set");
3418 /* Lex a u8-string literal.
3419 Verify the substring location data after running cpp_interpret_string
3423 test_lexer_string_locations_u8 (const line_table_case
&case_
)
3426 ....................000000000.11111111112.22222222233333
3427 ....................123456789.01234567890.12345678901234 */
3428 const char *content
= " u8\"0123456789\" /* non-str */\n";
3429 lexer_test
test (case_
, content
, NULL
);
3431 /* Verify that we get the expected token back, with the correct
3432 location information. */
3433 const cpp_token
*tok
= test
.get_token ();
3434 ASSERT_EQ (tok
->type
, CPP_UTF8STRING
);
3435 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "u8\"0123456789\"");
3437 /* Verify that cpp_interpret_string works. */
3438 cpp_string dst_string
;
3439 const enum cpp_ttype type
= CPP_STRING
;
3440 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3442 ASSERT_TRUE (result
);
3443 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
3444 free (const_cast <unsigned char *> (dst_string
.text
));
3446 /* Verify ranges of individual characters. This no longer includes the
3447 opening quote, but does include the closing quote. */
3448 for (int i
= 0; i
<= 10; i
++)
3449 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
3452 /* Lex a string literal containing UTF-8 source characters.
3453 Verify the substring location data after running cpp_interpret_string
3457 test_lexer_string_locations_utf8_source (const line_table_case
&case_
)
3459 /* This string literal is written out to the source file as UTF-8,
3460 and is of the form "before mojibake after", where "mojibake"
3461 is written as the following four unicode code points:
3462 U+6587 CJK UNIFIED IDEOGRAPH-6587
3463 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3464 U+5316 CJK UNIFIED IDEOGRAPH-5316
3465 U+3051 HIRAGANA LETTER KE.
3466 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3467 "before" and "after" are 1 byte per unicode character.
3469 The numbering shown are "columns", which are *byte* numbers within
3470 the line, rather than unicode character numbers.
3472 .................... 000000000.1111111.
3473 .................... 123456789.0123456. */
3474 const char *content
= (" \"before "
3475 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3476 UTF-8: 0xE6 0x96 0x87
3477 C octal escaped UTF-8: \346\226\207
3478 "column" numbers: 17-19. */
3481 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3482 UTF-8: 0xE5 0xAD 0x97
3483 C octal escaped UTF-8: \345\255\227
3484 "column" numbers: 20-22. */
3487 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3488 UTF-8: 0xE5 0x8C 0x96
3489 C octal escaped UTF-8: \345\214\226
3490 "column" numbers: 23-25. */
3493 /* U+3051 HIRAGANA LETTER KE
3494 UTF-8: 0xE3 0x81 0x91
3495 C octal escaped UTF-8: \343\201\221
3496 "column" numbers: 26-28. */
3499 /* column numbers 29 onwards
3500 2333333.33334444444444
3501 9012345.67890123456789. */
3502 " after\" /* non-str */\n");
3503 lexer_test
test (case_
, content
, NULL
);
3505 /* Verify that we get the expected token back, with the correct
3506 location information. */
3507 const cpp_token
*tok
= test
.get_token ();
3508 ASSERT_EQ (tok
->type
, CPP_STRING
);
3509 ASSERT_TOKEN_AS_TEXT_EQ
3510 (test
.m_parser
, tok
,
3511 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3513 /* Verify that cpp_interpret_string works. */
3514 cpp_string dst_string
;
3515 const enum cpp_ttype type
= CPP_STRING
;
3516 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3518 ASSERT_TRUE (result
);
3520 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3521 (const char *)dst_string
.text
);
3522 free (const_cast <unsigned char *> (dst_string
.text
));
3524 /* Verify ranges of individual characters. This no longer includes the
3525 opening quote, but does include the closing quote.
3526 Assuming that both source and execution encodings are UTF-8, we have
3527 a run of 25 octets in each, plus the NUL terminator. */
3528 for (int i
= 0; i
< 25; i
++)
3529 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, i
, 1, 10 + i
, 10 + i
);
3530 /* NUL-terminator should use the closing quote at column 35. */
3531 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, type
, 25, 1, 35, 35);
3533 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, type
, 26);
3536 /* Test of string literal concatenation. */
3539 test_lexer_string_locations_concatenation_1 (const line_table_case
&case_
)
3542 .....................000000000.111111.11112222222222
3543 .....................123456789.012345.67890123456789. */
3544 const char *content
= (" \"01234\" /* non-str */\n"
3545 " \"56789\" /* non-str */\n");
3546 lexer_test
test (case_
, content
, NULL
);
3548 location_t input_locs
[2];
3550 /* Verify that we get the expected tokens back. */
3551 auto_vec
<cpp_string
> input_strings
;
3552 const cpp_token
*tok_a
= test
.get_token ();
3553 ASSERT_EQ (tok_a
->type
, CPP_STRING
);
3554 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok_a
, "\"01234\"");
3555 input_strings
.safe_push (tok_a
->val
.str
);
3556 input_locs
[0] = tok_a
->src_loc
;
3558 const cpp_token
*tok_b
= test
.get_token ();
3559 ASSERT_EQ (tok_b
->type
, CPP_STRING
);
3560 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok_b
, "\"56789\"");
3561 input_strings
.safe_push (tok_b
->val
.str
);
3562 input_locs
[1] = tok_b
->src_loc
;
3564 /* Verify that cpp_interpret_string works. */
3565 cpp_string dst_string
;
3566 const enum cpp_ttype type
= CPP_STRING
;
3567 bool result
= cpp_interpret_string (test
.m_parser
,
3568 input_strings
.address (), 2,
3570 ASSERT_TRUE (result
);
3571 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
3572 free (const_cast <unsigned char *> (dst_string
.text
));
3574 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3575 test
.m_concats
.record_string_concatenation (2, input_locs
);
3577 location_t initial_loc
= input_locs
[0];
3579 /* "01234" on line 1. */
3580 for (int i
= 0; i
<= 4; i
++)
3581 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, i
, 1, 10 + i
, 10 + i
);
3582 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3583 for (int i
= 5; i
<= 10; i
++)
3584 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, i
, 2, 5 + i
, 5 + i
);
3586 ASSERT_NUM_SUBSTRING_RANGES (test
, initial_loc
, type
, 11);
3589 /* Another test of string literal concatenation. */
3592 test_lexer_string_locations_concatenation_2 (const line_table_case
&case_
)
3595 .....................000000000.111.11111112222222
3596 .....................123456789.012.34567890123456. */
3597 const char *content
= (" \"01\" /* non-str */\n"
3598 " \"23\" /* non-str */\n"
3599 " \"45\" /* non-str */\n"
3600 " \"67\" /* non-str */\n"
3601 " \"89\" /* non-str */\n");
3602 lexer_test
test (case_
, content
, NULL
);
3604 auto_vec
<cpp_string
> input_strings
;
3605 location_t input_locs
[5];
3607 /* Verify that we get the expected tokens back. */
3608 for (int i
= 0; i
< 5; i
++)
3610 const cpp_token
*tok
= test
.get_token ();
3611 ASSERT_EQ (tok
->type
, CPP_STRING
);
3612 input_strings
.safe_push (tok
->val
.str
);
3613 input_locs
[i
] = tok
->src_loc
;
3616 /* Verify that cpp_interpret_string works. */
3617 cpp_string dst_string
;
3618 const enum cpp_ttype type
= CPP_STRING
;
3619 bool result
= cpp_interpret_string (test
.m_parser
,
3620 input_strings
.address (), 5,
3622 ASSERT_TRUE (result
);
3623 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
3624 free (const_cast <unsigned char *> (dst_string
.text
));
3626 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3627 test
.m_concats
.record_string_concatenation (5, input_locs
);
3629 location_t initial_loc
= input_locs
[0];
3631 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3632 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3633 and expect get_source_range_for_substring to fail.
3634 However, for a string concatenation test, we can have a case
3635 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3636 but subsequent strings can be after it.
3637 Attempting to detect this within assert_char_at_range
3638 would overcomplicate the logic for the common test cases, so
3639 we detect it here. */
3640 if (should_have_column_data_p (input_locs
[0])
3641 && !should_have_column_data_p (input_locs
[4]))
3643 /* Verify that get_source_range_for_substring gracefully rejects
3645 source_range actual_range
;
3647 = get_source_range_for_char (test
.m_parser
, test
.m_file_cache
,
3649 initial_loc
, type
, 0, &actual_range
);
3650 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err
);
3654 for (int i
= 0; i
< 5; i
++)
3655 for (int j
= 0; j
< 2; j
++)
3656 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, (i
* 2) + j
,
3657 i
+ 1, 10 + j
, 10 + j
);
3659 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3660 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, 10, 5, 12, 12);
3662 ASSERT_NUM_SUBSTRING_RANGES (test
, initial_loc
, type
, 11);
3665 /* Another test of string literal concatenation, this time combined with
3666 various kinds of escaped characters. */
3669 test_lexer_string_locations_concatenation_3 (const line_table_case
&case_
)
3671 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3672 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3674 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3675 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3676 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3677 lexer_test
test (case_
, content
, NULL
);
3679 auto_vec
<cpp_string
> input_strings
;
3680 location_t input_locs
[4];
3682 /* Verify that we get the expected tokens back. */
3683 for (int i
= 0; i
< 4; i
++)
3685 const cpp_token
*tok
= test
.get_token ();
3686 ASSERT_EQ (tok
->type
, CPP_STRING
);
3687 input_strings
.safe_push (tok
->val
.str
);
3688 input_locs
[i
] = tok
->src_loc
;
3691 /* Verify that cpp_interpret_string works. */
3692 cpp_string dst_string
;
3693 const enum cpp_ttype type
= CPP_STRING
;
3694 bool result
= cpp_interpret_string (test
.m_parser
,
3695 input_strings
.address (), 4,
3697 ASSERT_TRUE (result
);
3698 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
3699 free (const_cast <unsigned char *> (dst_string
.text
));
3701 /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3702 test
.m_concats
.record_string_concatenation (4, input_locs
);
3704 location_t initial_loc
= input_locs
[0];
3706 for (int i
= 0; i
<= 4; i
++)
3707 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, i
, 1, 10 + i
, 10 + i
);
3708 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, 5, 1, 19, 22);
3709 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, 6, 1, 27, 30);
3710 for (int i
= 7; i
<= 9; i
++)
3711 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, i
, 1, 28 + i
, 28 + i
);
3713 /* NUL-terminator should use the location of the final closing quote. */
3714 ASSERT_CHAR_AT_RANGE (test
, initial_loc
, type
, 10, 1, 38, 38);
3716 ASSERT_NUM_SUBSTRING_RANGES (test
, initial_loc
, type
, 11);
3719 /* Test of string literal in a macro. */
3722 test_lexer_string_locations_macro (const line_table_case
&case_
)
3725 .....................0000000001111111111.22222222223.
3726 .....................1234567890123456789.01234567890. */
3727 const char *content
= ("#define MACRO \"0123456789\" /* non-str */\n"
3729 lexer_test
test (case_
, content
, NULL
);
3731 /* Verify that we get the expected tokens back. */
3732 const cpp_token
*tok
= test
.get_token ();
3733 ASSERT_EQ (tok
->type
, CPP_PADDING
);
3735 tok
= test
.get_token ();
3736 ASSERT_EQ (tok
->type
, CPP_STRING
);
3737 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"0123456789\"");
3739 /* Verify ranges of individual characters. We ought to
3740 see columns within the macro definition. */
3741 for (int i
= 0; i
<= 10; i
++)
3742 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3743 i
, 1, 20 + i
, 20 + i
);
3745 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
, 11);
3747 tok
= test
.get_token ();
3748 ASSERT_EQ (tok
->type
, CPP_PADDING
);
3751 /* Test of stringification of a macro argument. */
3754 test_lexer_string_locations_stringified_macro_argument
3755 (const line_table_case
&case_
)
3757 /* .....................000000000111111111122222222223.
3758 .....................123456789012345678901234567890. */
3759 const char *content
= ("#define MACRO(X) #X /* non-str */\n"
3761 lexer_test
test (case_
, content
, NULL
);
3763 /* Verify that we get the expected token back. */
3764 const cpp_token
*tok
= test
.get_token ();
3765 ASSERT_EQ (tok
->type
, CPP_PADDING
);
3767 tok
= test
.get_token ();
3768 ASSERT_EQ (tok
->type
, CPP_STRING
);
3769 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "\"foo\"");
3771 /* We don't support getting the location of a stringified macro
3772 argument. Verify that it fails gracefully. */
3773 ASSERT_HAS_NO_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
,
3774 "cpp_interpret_string_1 failed");
3776 tok
= test
.get_token ();
3777 ASSERT_EQ (tok
->type
, CPP_PADDING
);
3779 tok
= test
.get_token ();
3780 ASSERT_EQ (tok
->type
, CPP_PADDING
);
3783 /* Ensure that we are fail gracefully if something attempts to pass
3784 in a location that isn't a string literal token. Seen on this code:
3786 const char a[] = " %d ";
3787 __builtin_printf (a, 0.5);
3790 when c-format.cc erroneously used the indicated one-character
3791 location as the format string location, leading to a read past the
3792 end of a string buffer in cpp_interpret_string_1. */
3795 test_lexer_string_locations_non_string (const line_table_case
&case_
)
3797 /* .....................000000000111111111122222222223.
3798 .....................123456789012345678901234567890. */
3799 const char *content
= (" a\n");
3800 lexer_test
test (case_
, content
, NULL
);
3802 /* Verify that we get the expected token back. */
3803 const cpp_token
*tok
= test
.get_token ();
3804 ASSERT_EQ (tok
->type
, CPP_NAME
);
3805 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "a");
3807 /* At this point, libcpp is attempting to interpret the name as a
3808 string literal, despite it not starting with a quote. We don't detect
3809 that, but we should at least fail gracefully. */
3810 ASSERT_HAS_NO_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
,
3811 "cpp_interpret_string_1 failed");
3814 /* Ensure that we can read substring information for a token which
3815 starts in one linemap and ends in another . Adapted from
3816 gcc.dg/cpp/pr69985.c. */
3819 test_lexer_string_locations_long_line (const line_table_case
&case_
)
3821 /* .....................000000.000111111111
3822 .....................123456.789012346789. */
3823 const char *content
= ("/* A very long line, so that we start a new line map. */\n"
3824 " \"0123456789012345678901234567890123456789"
3825 "0123456789012345678901234567890123456789"
3826 "0123456789012345678901234567890123456789"
3829 lexer_test
test (case_
, content
, NULL
);
3831 /* Verify that we get the expected token back. */
3832 const cpp_token
*tok
= test
.get_token ();
3833 ASSERT_EQ (tok
->type
, CPP_STRING
);
3835 if (!should_have_column_data_p (line_table
->highest_location
))
3838 /* Verify ranges of individual characters. */
3839 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
, 131);
3840 for (int i
= 0; i
< 131; i
++)
3841 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3842 i
, 2, 7 + i
, 7 + i
);
3845 /* Test of locations within a raw string that doesn't contain a newline. */
3848 test_lexer_string_locations_raw_string_one_line (const line_table_case
&case_
)
3850 /* .....................00.0000000111111111122.
3851 .....................12.3456789012345678901. */
3852 const char *content
= ("R\"foo(0123456789)foo\"\n");
3853 lexer_test
test (case_
, content
, NULL
);
3855 /* Verify that we get the expected token back. */
3856 const cpp_token
*tok
= test
.get_token ();
3857 ASSERT_EQ (tok
->type
, CPP_STRING
);
3859 /* Verify that cpp_interpret_string works. */
3860 cpp_string dst_string
;
3861 const enum cpp_ttype type
= CPP_STRING
;
3862 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3864 ASSERT_TRUE (result
);
3865 ASSERT_STREQ ("0123456789", (const char *)dst_string
.text
);
3866 free (const_cast <unsigned char *> (dst_string
.text
));
3868 if (!should_have_column_data_p (line_table
->highest_location
))
3871 /* 0-9, plus the nil terminator. */
3872 ASSERT_NUM_SUBSTRING_RANGES (test
, tok
->src_loc
, CPP_STRING
, 11);
3873 for (int i
= 0; i
< 11; i
++)
3874 ASSERT_CHAR_AT_RANGE (test
, tok
->src_loc
, CPP_STRING
,
3875 i
, 1, 7 + i
, 7 + i
);
3878 /* Test of locations within a raw string that contains a newline. */
3881 test_lexer_string_locations_raw_string_multiline (const line_table_case
&case_
)
3883 /* .....................00.0000.
3884 .....................12.3456. */
3885 const char *content
= ("R\"foo(\n"
3886 /* .....................00000.
3887 .....................12345. */
3890 /* .....................00000.
3891 .....................12345. */
3893 lexer_test
test (case_
, content
, NULL
);
3895 /* Verify that we get the expected token back. */
3896 const cpp_token
*tok
= test
.get_token ();
3897 ASSERT_EQ (tok
->type
, CPP_STRING
);
3899 /* Verify that cpp_interpret_string works. */
3900 cpp_string dst_string
;
3901 const enum cpp_ttype type
= CPP_STRING
;
3902 bool result
= cpp_interpret_string (test
.m_parser
, &tok
->val
.str
, 1,
3904 ASSERT_TRUE (result
);
3905 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string
.text
);
3906 free (const_cast <unsigned char *> (dst_string
.text
));
3908 if (!should_have_column_data_p (line_table
->highest_location
))
3911 /* Currently we don't support locations within raw strings that
3912 contain newlines. */
3913 ASSERT_HAS_NO_SUBSTRING_RANGES (test
, tok
->src_loc
, tok
->type
,
3914 "range endpoints are on different lines");
3917 /* Test of parsing an unterminated raw string. */
3920 test_lexer_string_locations_raw_string_unterminated (const line_table_case
&case_
)
3922 const char *content
= "R\"ouch()ouCh\" /* etc */";
3924 lexer_diagnostic_sink diagnostics
;
3925 lexer_test
test (case_
, content
, &diagnostics
);
3926 test
.m_implicitly_expect_EOF
= false;
3928 /* Attempt to parse the raw string. */
3929 const cpp_token
*tok
= test
.get_token ();
3930 ASSERT_EQ (tok
->type
, CPP_EOF
);
3932 ASSERT_EQ (1, diagnostics
.m_diagnostics
.length ());
3933 /* We expect the message "unterminated raw string"
3934 in the "cpplib" translation domain.
3935 It's not clear that dgettext is available on all supported hosts,
3936 so this assertion is commented-out for now.
3937 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3938 diagnostics.m_diagnostics[0]);
3942 /* Test of lexing char constants. */
3945 test_lexer_char_constants (const line_table_case
&case_
)
3947 /* Various char constants.
3948 .....................0000000001111111111.22222222223.
3949 .....................1234567890123456789.01234567890. */
3950 const char *content
= (" 'a'\n"
3955 lexer_test
test (case_
, content
, NULL
);
3957 /* Verify that we get the expected tokens back. */
3959 const cpp_token
*tok
= test
.get_token ();
3960 ASSERT_EQ (tok
->type
, CPP_CHAR
);
3961 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "'a'");
3963 unsigned int chars_seen
;
3965 cppchar_t cc
= cpp_interpret_charconst (test
.m_parser
, tok
,
3966 &chars_seen
, &unsignedp
);
3967 ASSERT_EQ (cc
, 'a');
3968 ASSERT_EQ (chars_seen
, 1);
3971 tok
= test
.get_token ();
3972 ASSERT_EQ (tok
->type
, CPP_CHAR16
);
3973 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "u'a'");
3976 tok
= test
.get_token ();
3977 ASSERT_EQ (tok
->type
, CPP_CHAR32
);
3978 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "U'a'");
3981 tok
= test
.get_token ();
3982 ASSERT_EQ (tok
->type
, CPP_WCHAR
);
3983 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "L'a'");
3985 /* 'abc' (c-char-sequence). */
3986 tok
= test
.get_token ();
3987 ASSERT_EQ (tok
->type
, CPP_CHAR
);
3988 ASSERT_TOKEN_AS_TEXT_EQ (test
.m_parser
, tok
, "'abc'");
3990 /* A table of interesting location_t values, giving one axis of our test
3993 static const location_t boundary_locations
[] = {
3994 /* Zero means "don't override the default values for a new line_table". */
3997 /* An arbitrary non-zero value that isn't close to one of
3998 the boundary values below. */
4001 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
4002 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
- 0x100,
4003 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
- 1,
4004 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
,
4005 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
+ 1,
4006 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES
+ 0x100,
4008 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
4009 LINE_MAP_MAX_LOCATION_WITH_COLS
- 0x200,
4010 LINE_MAP_MAX_LOCATION_WITH_COLS
- 1,
4011 LINE_MAP_MAX_LOCATION_WITH_COLS
,
4012 LINE_MAP_MAX_LOCATION_WITH_COLS
+ 1,
4013 LINE_MAP_MAX_LOCATION_WITH_COLS
+ 0x200,
4016 /* Run TESTCASE multiple times, once for each case in our test matrix. */
4019 for_each_line_table_case (void (*testcase
) (const line_table_case
&))
4021 /* As noted above in the description of struct line_table_case,
4022 we want to explore a test matrix of interesting line_table
4023 situations, running various selftests for each case within the
4026 /* Run all tests with:
4027 (a) line_table->default_range_bits == 0, and
4028 (b) line_table->default_range_bits == line_map_suggested_range_bits. */
4030 for (int default_range_bits
: {0, line_map_suggested_range_bits
})
4032 /* ...and use each of the "interesting" location values as
4033 the starting location within line_table. */
4034 const int num_boundary_locations
= ARRAY_SIZE (boundary_locations
);
4035 for (int loc_idx
= 0; loc_idx
< num_boundary_locations
; loc_idx
++)
4037 line_table_case
c (default_range_bits
, boundary_locations
[loc_idx
]);
4043 /* Verify that when presented with a consecutive pair of locations with
4044 a very large line offset, we don't attempt to consolidate them into
4045 a single ordinary linemap where the line offsets within the line map
4046 would lead to overflow (PR lto/88147). */
4049 test_line_offset_overflow ()
4051 line_table_test
ltt (line_table_case (5, 0));
4053 linemap_add (line_table
, LC_ENTER
, false, "foo.c", 0);
4054 linemap_line_start (line_table
, 1, 100);
4055 location_t loc_a
= linemap_line_start (line_table
, 2578, 255);
4056 assert_loceq ("foo.c", 2578, 0, loc_a
);
4058 const line_map_ordinary
*ordmap_a
= LINEMAPS_LAST_ORDINARY_MAP (line_table
);
4059 ASSERT_EQ (ordmap_a
->m_column_and_range_bits
, 13);
4060 ASSERT_EQ (ordmap_a
->m_range_bits
, 5);
4062 location_t loc_b
= linemap_line_start (line_table
, 404198, 512);
4063 assert_loceq ("foo.c", 404198, 0, loc_b
);
4065 /* We should have started a new linemap, rather than attempting to store
4066 a very large line offset. */
4067 const line_map_ordinary
*ordmap_b
= LINEMAPS_LAST_ORDINARY_MAP (line_table
);
4068 ASSERT_NE (ordmap_a
, ordmap_b
);
4071 void test_cpp_utf8 ()
4073 const int def_tabstop
= 8;
4074 cpp_char_column_policy
policy (def_tabstop
, cpp_wcwidth
);
4076 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
4078 int w_bad
= cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy
);
4079 ASSERT_EQ (8, w_bad
);
4080 int w_ctrl
= cpp_display_width ("\r\n\v\0\1", 5, policy
);
4081 ASSERT_EQ (5, w_ctrl
);
4084 /* Verify that wcwidth of valid UTF-8 is as expected. */
4086 const int w_pi
= cpp_display_width ("\xcf\x80", 2, policy
);
4087 ASSERT_EQ (1, w_pi
);
4088 const int w_emoji
= cpp_display_width ("\xf0\x9f\x98\x82", 4, policy
);
4089 ASSERT_EQ (2, w_emoji
);
4090 const int w_umlaut_precomposed
= cpp_display_width ("\xc3\xbf", 2,
4092 ASSERT_EQ (1, w_umlaut_precomposed
);
4093 const int w_umlaut_combining
= cpp_display_width ("y\xcc\x88", 3,
4095 ASSERT_EQ (1, w_umlaut_combining
);
4096 const int w_han
= cpp_display_width ("\xe4\xb8\xba", 3, policy
);
4097 ASSERT_EQ (2, w_han
);
4098 const int w_ascii
= cpp_display_width ("GCC", 3, policy
);
4099 ASSERT_EQ (3, w_ascii
);
4100 const int w_mixed
= cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
4101 "\x9f! \xe4\xb8\xba y\xcc\x88",
4103 ASSERT_EQ (18, w_mixed
);
4106 /* Verify that display width properly expands tabs. */
4108 const char *tstr
= "\tabc\td";
4109 ASSERT_EQ (6, cpp_display_width (tstr
, 6,
4110 cpp_char_column_policy (1, cpp_wcwidth
)));
4111 ASSERT_EQ (10, cpp_display_width (tstr
, 6,
4112 cpp_char_column_policy (3, cpp_wcwidth
)));
4113 ASSERT_EQ (17, cpp_display_width (tstr
, 6,
4114 cpp_char_column_policy (8, cpp_wcwidth
)));
4116 cpp_display_column_to_byte_column
4117 (tstr
, 6, 7, cpp_char_column_policy (8, cpp_wcwidth
)));
4120 /* Verify that cpp_byte_column_to_display_column can go past the end,
4121 and similar edge cases. */
4130 ASSERT_EQ (5, cpp_display_width (str
, 6, policy
));
4132 cpp_byte_column_to_display_column (str
, 6, 106, policy
));
4134 cpp_byte_column_to_display_column (NULL
, 0, 10000, policy
));
4136 cpp_byte_column_to_display_column (NULL
, 10000, 0, policy
));
4139 /* Verify that cpp_display_column_to_byte_column can go past the end,
4140 and similar edge cases, and check invertibility. */
4144 000000000000000000000000000000000000011
4145 111111112222222234444444455555555678901 */
4146 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4147 /* 000000000000000000000000000000000111111
4148 111122223333444456666777788889999012345
4150 ASSERT_EQ (4, cpp_display_column_to_byte_column (str
, 15, 2, policy
));
4152 cpp_display_column_to_byte_column (str
, 15, 11, policy
));
4154 cpp_display_column_to_byte_column (str
, 15, 111, policy
));
4156 cpp_display_column_to_byte_column (NULL
, 0, 10000, policy
));
4158 cpp_display_column_to_byte_column (NULL
, 10000, 0, policy
));
4160 /* Verify that we do not interrupt a UTF-8 sequence. */
4161 ASSERT_EQ (4, cpp_display_column_to_byte_column (str
, 15, 1, policy
));
4163 for (int byte_col
= 1; byte_col
<= 15; ++byte_col
)
4166 = cpp_byte_column_to_display_column (str
, 15, byte_col
, policy
);
4168 = cpp_display_column_to_byte_column (str
, 15, disp_col
, policy
);
4170 /* If we ask for the display column in the middle of a UTF-8
4171 sequence, it will return the length of the partial sequence,
4172 matching the behavior of GCC before display column support.
4173 Otherwise check the round trip was successful. */
4175 ASSERT_EQ (byte_col
, disp_col
);
4176 else if (byte_col
>= 6 && byte_col
< 9)
4177 ASSERT_EQ (3 + (byte_col
- 5), disp_col
);
4179 ASSERT_EQ (byte_col2
, byte_col
);
4185 check_cpp_valid_utf8_p (const char *str
)
4187 return cpp_valid_utf8_p (str
, strlen (str
));
4190 /* Check that cpp_valid_utf8_p works as expected. */
4193 test_cpp_valid_utf8_p ()
4195 ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
4197 /* 2-byte char (pi). */
4198 ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
4200 /* 3-byte chars (the Japanese word "mojibake"). */
4201 ASSERT_TRUE (check_cpp_valid_utf8_p
4203 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
4204 UTF-8: 0xE6 0x96 0x87
4205 C octal escaped UTF-8: \346\226\207. */
4207 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
4208 UTF-8: 0xE5 0xAD 0x97
4209 C octal escaped UTF-8: \345\255\227. */
4211 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
4212 UTF-8: 0xE5 0x8C 0x96
4213 C octal escaped UTF-8: \345\214\226. */
4215 /* U+3051 HIRAGANA LETTER KE
4216 UTF-8: 0xE3 0x81 0x91
4217 C octal escaped UTF-8: \343\201\221. */
4220 /* 4-byte char: an emoji. */
4221 ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
4223 /* Control codes, including the NUL byte. */
4224 ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
4226 ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
4228 /* Unexpected continuation bytes. */
4229 for (unsigned char continuation_byte
= 0x80;
4230 continuation_byte
<= 0xbf;
4231 continuation_byte
++)
4232 ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte
, 1));
4234 /* "Lonely start characters" for 2-byte sequences. */
4236 unsigned char buf
[2];
4241 ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf
, 2));
4244 /* "Lonely start characters" for 3-byte sequences. */
4246 unsigned char buf
[2];
4251 ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf
, 2));
4254 /* "Lonely start characters" for 4-byte sequences. */
4256 unsigned char buf
[2];
4261 ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf
, 2));
4264 /* Invalid start characters (formerly valid for 5-byte and 6-byte
4267 unsigned char buf
[2];
4272 ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf
, 2));
4275 /* Impossible bytes. */
4276 ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
4277 ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
4278 ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
4279 ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
4282 /* Run all of the selftests within this file. */
4287 test_linenum_comparisons ();
4288 test_should_have_column_data_p ();
4289 test_unknown_location ();
4291 for_each_line_table_case (test_make_location_nonpure_range_endpoints
);
4293 for_each_line_table_case (test_accessing_ordinary_linemaps
);
4294 for_each_line_table_case (test_lexer
);
4295 for_each_line_table_case (test_lexer_string_locations_simple
);
4296 for_each_line_table_case (test_lexer_string_locations_ebcdic
);
4297 for_each_line_table_case (test_lexer_string_locations_hex
);
4298 for_each_line_table_case (test_lexer_string_locations_oct
);
4299 for_each_line_table_case (test_lexer_string_locations_letter_escape_1
);
4300 for_each_line_table_case (test_lexer_string_locations_letter_escape_2
);
4301 for_each_line_table_case (test_lexer_string_locations_ucn4
);
4302 for_each_line_table_case (test_lexer_string_locations_ucn8
);
4303 for_each_line_table_case (test_lexer_string_locations_wide_string
);
4304 for_each_line_table_case (test_lexer_string_locations_string16
);
4305 for_each_line_table_case (test_lexer_string_locations_string32
);
4306 for_each_line_table_case (test_lexer_string_locations_u8
);
4307 for_each_line_table_case (test_lexer_string_locations_utf8_source
);
4308 for_each_line_table_case (test_lexer_string_locations_concatenation_1
);
4309 for_each_line_table_case (test_lexer_string_locations_concatenation_2
);
4310 for_each_line_table_case (test_lexer_string_locations_concatenation_3
);
4311 for_each_line_table_case (test_lexer_string_locations_macro
);
4312 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument
);
4313 for_each_line_table_case (test_lexer_string_locations_non_string
);
4314 for_each_line_table_case (test_lexer_string_locations_long_line
);
4315 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line
);
4316 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline
);
4317 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated
);
4318 for_each_line_table_case (test_lexer_char_constants
);
4320 test_reading_source_line ();
4321 test_reading_source_buffer ();
4323 test_line_offset_overflow ();
4326 test_cpp_valid_utf8_p ();
4329 } // namespace selftest
4331 #endif /* CHECKING_P */