[InstCombine] Signed saturation tests. NFC
[llvm-complete.git] / include / llvm / Support / DataExtractor.h
blobf590a1e104fb17a17d9c9060e67881de6bd23a50
1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H
10 #define LLVM_SUPPORT_DATAEXTRACTOR_H
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/Support/DataTypes.h"
14 #include "llvm/Support/Error.h"
16 namespace llvm {
18 /// An auxiliary type to facilitate extraction of 3-byte entities.
19 struct Uint24 {
20 uint8_t Bytes[3];
21 Uint24(uint8_t U) {
22 Bytes[0] = Bytes[1] = Bytes[2] = U;
24 Uint24(uint8_t U0, uint8_t U1, uint8_t U2) {
25 Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2;
27 uint32_t getAsUint32(bool IsLittleEndian) const {
28 int LoIx = IsLittleEndian ? 0 : 2;
29 return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16);
33 using uint24_t = Uint24;
34 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3");
36 /// Needed by swapByteOrder().
37 inline uint24_t getSwappedBytes(uint24_t C) {
38 return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]);
41 class DataExtractor {
42 StringRef Data;
43 uint8_t IsLittleEndian;
44 uint8_t AddressSize;
45 public:
46 /// A class representing a position in a DataExtractor, as well as any error
47 /// encountered during extraction. It enables one to extract a sequence of
48 /// values without error-checking and then checking for errors in bulk at the
49 /// end. The class holds an Error object, so failing to check the result of
50 /// the parse will result in a runtime error. The error flag is sticky and
51 /// will cause all subsequent extraction functions to fail without even
52 /// attempting to parse and without updating the Cursor offset. After clearing
53 /// the error flag, one can again use the Cursor object for parsing.
54 class Cursor {
55 uint64_t Offset;
56 Error Err;
58 friend class DataExtractor;
60 public:
61 /// Construct a cursor for extraction from the given offset.
62 explicit Cursor(uint64_t Offset) : Offset(Offset), Err(Error::success()) {}
64 /// Checks whether the cursor is valid (i.e. no errors were encountered). In
65 /// case of errors, this does not clear the error flag -- one must call
66 /// takeError() instead.
67 explicit operator bool() { return !Err; }
69 /// Return the current position of this Cursor. In the error state this is
70 /// the position of the Cursor before the first error was encountered.
71 uint64_t tell() const { return Offset; }
73 /// Return error contained inside this Cursor, if any. Clears the internal
74 /// Cursor state.
75 Error takeError() { return std::move(Err); }
78 /// Construct with a buffer that is owned by the caller.
79 ///
80 /// This constructor allows us to use data that is owned by the
81 /// caller. The data must stay around as long as this object is
82 /// valid.
83 DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
84 : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
85 DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian,
86 uint8_t AddressSize)
87 : Data(StringRef(reinterpret_cast<const char *>(Data.data()),
88 Data.size())),
89 IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
91 /// Get the data pointed to by this extractor.
92 StringRef getData() const { return Data; }
93 /// Get the endianness for this extractor.
94 bool isLittleEndian() const { return IsLittleEndian; }
95 /// Get the address size for this extractor.
96 uint8_t getAddressSize() const { return AddressSize; }
97 /// Set the address size for this extractor.
98 void setAddressSize(uint8_t Size) { AddressSize = Size; }
100 /// Extract a C string from \a *offset_ptr.
102 /// Returns a pointer to a C String from the data at the offset
103 /// pointed to by \a offset_ptr. A variable length NULL terminated C
104 /// string will be extracted and the \a offset_ptr will be
105 /// updated with the offset of the byte that follows the NULL
106 /// terminator byte.
108 /// @param[in,out] offset_ptr
109 /// A pointer to an offset within the data that will be advanced
110 /// by the appropriate number of bytes if the value is extracted
111 /// correctly. If the offset is out of bounds or there are not
112 /// enough bytes to extract this value, the offset will be left
113 /// unmodified.
115 /// @return
116 /// A pointer to the C string value in the data. If the offset
117 /// pointed to by \a offset_ptr is out of bounds, or if the
118 /// offset plus the length of the C string is out of bounds,
119 /// NULL will be returned.
120 const char *getCStr(uint64_t *offset_ptr) const;
122 /// Extract a C string from \a *offset_ptr.
124 /// Returns a StringRef for the C String from the data at the offset
125 /// pointed to by \a offset_ptr. A variable length NULL terminated C
126 /// string will be extracted and the \a offset_ptr will be
127 /// updated with the offset of the byte that follows the NULL
128 /// terminator byte.
130 /// \param[in,out] offset_ptr
131 /// A pointer to an offset within the data that will be advanced
132 /// by the appropriate number of bytes if the value is extracted
133 /// correctly. If the offset is out of bounds or there are not
134 /// enough bytes to extract this value, the offset will be left
135 /// unmodified.
137 /// \return
138 /// A StringRef for the C string value in the data. If the offset
139 /// pointed to by \a offset_ptr is out of bounds, or if the
140 /// offset plus the length of the C string is out of bounds,
141 /// a default-initialized StringRef will be returned.
142 StringRef getCStrRef(uint64_t *offset_ptr) const;
144 /// Extract an unsigned integer of size \a byte_size from \a
145 /// *offset_ptr.
147 /// Extract a single unsigned integer value and update the offset
148 /// pointed to by \a offset_ptr. The size of the extracted integer
149 /// is specified by the \a byte_size argument. \a byte_size should
150 /// have a value greater than or equal to one and less than or equal
151 /// to eight since the return value is 64 bits wide. Any
152 /// \a byte_size values less than 1 or greater than 8 will result in
153 /// nothing being extracted, and zero being returned.
155 /// @param[in,out] offset_ptr
156 /// A pointer to an offset within the data that will be advanced
157 /// by the appropriate number of bytes if the value is extracted
158 /// correctly. If the offset is out of bounds or there are not
159 /// enough bytes to extract this value, the offset will be left
160 /// unmodified.
162 /// @param[in] byte_size
163 /// The size in byte of the integer to extract.
165 /// @param[in,out] Err
166 /// A pointer to an Error object. Upon return the Error object is set to
167 /// indicate the result (success/failure) of the function. If the Error
168 /// object is already set when calling this function, no extraction is
169 /// performed.
171 /// @return
172 /// The unsigned integer value that was extracted, or zero on
173 /// failure.
174 uint64_t getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
175 Error *Err = nullptr) const;
177 /// Extract an unsigned integer of the given size from the location given by
178 /// the cursor. In case of an extraction error, or if the cursor is already in
179 /// an error state, zero is returned.
180 uint64_t getUnsigned(Cursor &C, uint32_t Size) const {
181 return getUnsigned(&C.Offset, Size, &C.Err);
184 /// Extract an signed integer of size \a byte_size from \a *offset_ptr.
186 /// Extract a single signed integer value (sign extending if required)
187 /// and update the offset pointed to by \a offset_ptr. The size of
188 /// the extracted integer is specified by the \a byte_size argument.
189 /// \a byte_size should have a value greater than or equal to one
190 /// and less than or equal to eight since the return value is 64
191 /// bits wide. Any \a byte_size values less than 1 or greater than
192 /// 8 will result in nothing being extracted, and zero being returned.
194 /// @param[in,out] offset_ptr
195 /// A pointer to an offset within the data that will be advanced
196 /// by the appropriate number of bytes if the value is extracted
197 /// correctly. If the offset is out of bounds or there are not
198 /// enough bytes to extract this value, the offset will be left
199 /// unmodified.
201 /// @param[in] size
202 /// The size in bytes of the integer to extract.
204 /// @return
205 /// The sign extended signed integer value that was extracted,
206 /// or zero on failure.
207 int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const;
209 //------------------------------------------------------------------
210 /// Extract an pointer from \a *offset_ptr.
212 /// Extract a single pointer from the data and update the offset
213 /// pointed to by \a offset_ptr. The size of the extracted pointer
214 /// is \a getAddressSize(), so the address size has to be
215 /// set correctly prior to extracting any pointer values.
217 /// @param[in,out] offset_ptr
218 /// A pointer to an offset within the data that will be advanced
219 /// by the appropriate number of bytes if the value is extracted
220 /// correctly. If the offset is out of bounds or there are not
221 /// enough bytes to extract this value, the offset will be left
222 /// unmodified.
224 /// @return
225 /// The extracted pointer value as a 64 integer.
226 uint64_t getAddress(uint64_t *offset_ptr) const {
227 return getUnsigned(offset_ptr, AddressSize);
230 /// Extract a pointer-sized unsigned integer from the location given by the
231 /// cursor. In case of an extraction error, or if the cursor is already in
232 /// an error state, zero is returned.
233 uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); }
235 /// Extract a uint8_t value from \a *offset_ptr.
237 /// Extract a single uint8_t from the binary data at the offset
238 /// pointed to by \a offset_ptr, and advance the offset on success.
240 /// @param[in,out] offset_ptr
241 /// A pointer to an offset within the data that will be advanced
242 /// by the appropriate number of bytes if the value is extracted
243 /// correctly. If the offset is out of bounds or there are not
244 /// enough bytes to extract this value, the offset will be left
245 /// unmodified.
247 /// @param[in,out] Err
248 /// A pointer to an Error object. Upon return the Error object is set to
249 /// indicate the result (success/failure) of the function. If the Error
250 /// object is already set when calling this function, no extraction is
251 /// performed.
253 /// @return
254 /// The extracted uint8_t value.
255 uint8_t getU8(uint64_t *offset_ptr, Error *Err = nullptr) const;
257 /// Extract a single uint8_t value from the location given by the cursor. In
258 /// case of an extraction error, or if the cursor is already in an error
259 /// state, zero is returned.
260 uint8_t getU8(Cursor &C) const { return getU8(&C.Offset, &C.Err); }
262 /// Extract \a count uint8_t values from \a *offset_ptr.
264 /// Extract \a count uint8_t values from the binary data at the
265 /// offset pointed to by \a offset_ptr, and advance the offset on
266 /// success. The extracted values are copied into \a dst.
268 /// @param[in,out] offset_ptr
269 /// A pointer to an offset within the data that will be advanced
270 /// by the appropriate number of bytes if the value is extracted
271 /// correctly. If the offset is out of bounds or there are not
272 /// enough bytes to extract this value, the offset will be left
273 /// unmodified.
275 /// @param[out] dst
276 /// A buffer to copy \a count uint8_t values into. \a dst must
277 /// be large enough to hold all requested data.
279 /// @param[in] count
280 /// The number of uint8_t values to extract.
282 /// @return
283 /// \a dst if all values were properly extracted and copied,
284 /// NULL otherise.
285 uint8_t *getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const;
287 /// Extract \a Count uint8_t values from the location given by the cursor and
288 /// store them into the destination buffer. In case of an extraction error, or
289 /// if the cursor is already in an error state, a nullptr is returned and the
290 /// destination buffer is left unchanged.
291 uint8_t *getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const;
293 /// Extract \a Count uint8_t values from the location given by the cursor and
294 /// store them into the destination vector. The vector is resized to fit the
295 /// extracted data. In case of an extraction error, or if the cursor is
296 /// already in an error state, the destination vector is left unchanged and
297 /// cursor is placed into an error state.
298 void getU8(Cursor &C, SmallVectorImpl<uint8_t> &Dst, uint32_t Count) const {
299 if (isValidOffsetForDataOfSize(C.Offset, Count))
300 Dst.resize(Count);
302 // This relies on the fact that getU8 will not attempt to write to the
303 // buffer if isValidOffsetForDataOfSize(C.Offset, Count) is false.
304 getU8(C, Dst.data(), Count);
307 //------------------------------------------------------------------
308 /// Extract a uint16_t value from \a *offset_ptr.
310 /// Extract a single uint16_t from the binary data at the offset
311 /// pointed to by \a offset_ptr, and update the offset on success.
313 /// @param[in,out] offset_ptr
314 /// A pointer to an offset within the data that will be advanced
315 /// by the appropriate number of bytes if the value is extracted
316 /// correctly. If the offset is out of bounds or there are not
317 /// enough bytes to extract this value, the offset will be left
318 /// unmodified.
320 /// @param[in,out] Err
321 /// A pointer to an Error object. Upon return the Error object is set to
322 /// indicate the result (success/failure) of the function. If the Error
323 /// object is already set when calling this function, no extraction is
324 /// performed.
326 /// @return
327 /// The extracted uint16_t value.
328 //------------------------------------------------------------------
329 uint16_t getU16(uint64_t *offset_ptr, Error *Err = nullptr) const;
331 /// Extract a single uint16_t value from the location given by the cursor. In
332 /// case of an extraction error, or if the cursor is already in an error
333 /// state, zero is returned.
334 uint16_t getU16(Cursor &C) const { return getU16(&C.Offset, &C.Err); }
336 /// Extract \a count uint16_t values from \a *offset_ptr.
338 /// Extract \a count uint16_t values from the binary data at the
339 /// offset pointed to by \a offset_ptr, and advance the offset on
340 /// success. The extracted values are copied into \a dst.
342 /// @param[in,out] offset_ptr
343 /// A pointer to an offset within the data that will be advanced
344 /// by the appropriate number of bytes if the value is extracted
345 /// correctly. If the offset is out of bounds or there are not
346 /// enough bytes to extract this value, the offset will be left
347 /// unmodified.
349 /// @param[out] dst
350 /// A buffer to copy \a count uint16_t values into. \a dst must
351 /// be large enough to hold all requested data.
353 /// @param[in] count
354 /// The number of uint16_t values to extract.
356 /// @return
357 /// \a dst if all values were properly extracted and copied,
358 /// NULL otherise.
359 uint16_t *getU16(uint64_t *offset_ptr, uint16_t *dst, uint32_t count) const;
361 /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
362 /// in a uint32_t.
364 /// Extract 3 bytes from the binary data at the offset pointed to by
365 /// \a offset_ptr, construct a uint32_t from them and update the offset
366 /// on success.
368 /// @param[in,out] offset_ptr
369 /// A pointer to an offset within the data that will be advanced
370 /// by the 3 bytes if the value is extracted correctly. If the offset
371 /// is out of bounds or there are not enough bytes to extract this value,
372 /// the offset will be left unmodified.
374 /// @return
375 /// The extracted 24-bit value represented in a uint32_t.
376 uint32_t getU24(uint64_t *offset_ptr) const;
378 /// Extract a uint32_t value from \a *offset_ptr.
380 /// Extract a single uint32_t from the binary data at the offset
381 /// pointed to by \a offset_ptr, and update the offset on success.
383 /// @param[in,out] offset_ptr
384 /// A pointer to an offset within the data that will be advanced
385 /// by the appropriate number of bytes if the value is extracted
386 /// correctly. If the offset is out of bounds or there are not
387 /// enough bytes to extract this value, the offset will be left
388 /// unmodified.
390 /// @param[in,out] Err
391 /// A pointer to an Error object. Upon return the Error object is set to
392 /// indicate the result (success/failure) of the function. If the Error
393 /// object is already set when calling this function, no extraction is
394 /// performed.
396 /// @return
397 /// The extracted uint32_t value.
398 uint32_t getU32(uint64_t *offset_ptr, Error *Err = nullptr) const;
400 /// Extract a single uint32_t value from the location given by the cursor. In
401 /// case of an extraction error, or if the cursor is already in an error
402 /// state, zero is returned.
403 uint32_t getU32(Cursor &C) const { return getU32(&C.Offset, &C.Err); }
405 /// Extract \a count uint32_t values from \a *offset_ptr.
407 /// Extract \a count uint32_t values from the binary data at the
408 /// offset pointed to by \a offset_ptr, and advance the offset on
409 /// success. The extracted values are copied into \a dst.
411 /// @param[in,out] offset_ptr
412 /// A pointer to an offset within the data that will be advanced
413 /// by the appropriate number of bytes if the value is extracted
414 /// correctly. If the offset is out of bounds or there are not
415 /// enough bytes to extract this value, the offset will be left
416 /// unmodified.
418 /// @param[out] dst
419 /// A buffer to copy \a count uint32_t values into. \a dst must
420 /// be large enough to hold all requested data.
422 /// @param[in] count
423 /// The number of uint32_t values to extract.
425 /// @return
426 /// \a dst if all values were properly extracted and copied,
427 /// NULL otherise.
428 uint32_t *getU32(uint64_t *offset_ptr, uint32_t *dst, uint32_t count) const;
430 /// Extract a uint64_t value from \a *offset_ptr.
432 /// Extract a single uint64_t from the binary data at the offset
433 /// pointed to by \a offset_ptr, and update the offset on success.
435 /// @param[in,out] offset_ptr
436 /// A pointer to an offset within the data that will be advanced
437 /// by the appropriate number of bytes if the value is extracted
438 /// correctly. If the offset is out of bounds or there are not
439 /// enough bytes to extract this value, the offset will be left
440 /// unmodified.
442 /// @param[in,out] Err
443 /// A pointer to an Error object. Upon return the Error object is set to
444 /// indicate the result (success/failure) of the function. If the Error
445 /// object is already set when calling this function, no extraction is
446 /// performed.
448 /// @return
449 /// The extracted uint64_t value.
450 uint64_t getU64(uint64_t *offset_ptr, Error *Err = nullptr) const;
452 /// Extract a single uint64_t value from the location given by the cursor. In
453 /// case of an extraction error, or if the cursor is already in an error
454 /// state, zero is returned.
455 uint64_t getU64(Cursor &C) const { return getU64(&C.Offset, &C.Err); }
457 /// Extract \a count uint64_t values from \a *offset_ptr.
459 /// Extract \a count uint64_t values from the binary data at the
460 /// offset pointed to by \a offset_ptr, and advance the offset on
461 /// success. The extracted values are copied into \a dst.
463 /// @param[in,out] offset_ptr
464 /// A pointer to an offset within the data that will be advanced
465 /// by the appropriate number of bytes if the value is extracted
466 /// correctly. If the offset is out of bounds or there are not
467 /// enough bytes to extract this value, the offset will be left
468 /// unmodified.
470 /// @param[out] dst
471 /// A buffer to copy \a count uint64_t values into. \a dst must
472 /// be large enough to hold all requested data.
474 /// @param[in] count
475 /// The number of uint64_t values to extract.
477 /// @return
478 /// \a dst if all values were properly extracted and copied,
479 /// NULL otherise.
480 uint64_t *getU64(uint64_t *offset_ptr, uint64_t *dst, uint32_t count) const;
482 /// Extract a signed LEB128 value from \a *offset_ptr.
484 /// Extracts an signed LEB128 number from this object's data
485 /// starting at the offset pointed to by \a offset_ptr. The offset
486 /// pointed to by \a offset_ptr will be updated with the offset of
487 /// the byte following the last extracted byte.
489 /// @param[in,out] offset_ptr
490 /// A pointer to an offset within the data that will be advanced
491 /// by the appropriate number of bytes if the value is extracted
492 /// correctly. If the offset is out of bounds or there are not
493 /// enough bytes to extract this value, the offset will be left
494 /// unmodified.
496 /// @return
497 /// The extracted signed integer value.
498 int64_t getSLEB128(uint64_t *offset_ptr) const;
500 /// Extract a unsigned LEB128 value from \a *offset_ptr.
502 /// Extracts an unsigned LEB128 number from this object's data
503 /// starting at the offset pointed to by \a offset_ptr. The offset
504 /// pointed to by \a offset_ptr will be updated with the offset of
505 /// the byte following the last extracted byte.
507 /// @param[in,out] offset_ptr
508 /// A pointer to an offset within the data that will be advanced
509 /// by the appropriate number of bytes if the value is extracted
510 /// correctly. If the offset is out of bounds or there are not
511 /// enough bytes to extract this value, the offset will be left
512 /// unmodified.
514 /// @param[in,out] Err
515 /// A pointer to an Error object. Upon return the Error object is set to
516 /// indicate the result (success/failure) of the function. If the Error
517 /// object is already set when calling this function, no extraction is
518 /// performed.
520 /// @return
521 /// The extracted unsigned integer value.
522 uint64_t getULEB128(uint64_t *offset_ptr, llvm::Error *Err = nullptr) const;
524 /// Extract an unsigned ULEB128 value from the location given by the cursor.
525 /// In case of an extraction error, or if the cursor is already in an error
526 /// state, zero is returned.
527 uint64_t getULEB128(Cursor &C) const { return getULEB128(&C.Offset, &C.Err); }
529 /// Advance the Cursor position by the given number of bytes. No-op if the
530 /// cursor is in an error state.
531 void skip(Cursor &C, uint64_t Length) const;
533 /// Return true iff the cursor is at the end of the buffer, regardless of the
534 /// error state of the cursor. The only way both eof and error states can be
535 /// true is if one attempts a read while the cursor is at the very end of the
536 /// data buffer.
537 bool eof(const Cursor &C) const { return Data.size() == C.Offset; }
539 /// Test the validity of \a offset.
541 /// @return
542 /// \b true if \a offset is a valid offset into the data in this
543 /// object, \b false otherwise.
544 bool isValidOffset(uint64_t offset) const { return Data.size() > offset; }
546 /// Test the availability of \a length bytes of data from \a offset.
548 /// @return
549 /// \b true if \a offset is a valid offset and there are \a
550 /// length bytes available at that offset, \b false otherwise.
551 bool isValidOffsetForDataOfSize(uint64_t offset, uint64_t length) const {
552 return offset + length >= offset && isValidOffset(offset + length - 1);
555 /// Test the availability of enough bytes of data for a pointer from
556 /// \a offset. The size of a pointer is \a getAddressSize().
558 /// @return
559 /// \b true if \a offset is a valid offset and there are enough
560 /// bytes for a pointer available at that offset, \b false
561 /// otherwise.
562 bool isValidOffsetForAddress(uint64_t offset) const {
563 return isValidOffsetForDataOfSize(offset, AddressSize);
566 protected:
567 // Make it possible for subclasses to access these fields without making them
568 // public.
569 static uint64_t &getOffset(Cursor &C) { return C.Offset; }
570 static Error &getError(Cursor &C) { return C.Err; }
573 } // namespace llvm
575 #endif