[Alignment][NFC] Use Align with TargetLowering::setMinFunctionAlignment
[llvm-core.git] / include / llvm / Support / DataExtractor.h
blobbd337f23925c9cca3ba6593dd9c9ae63641f2a1f
1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H
10 #define LLVM_SUPPORT_DATAEXTRACTOR_H
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/Support/DataTypes.h"
14 #include "llvm/Support/Error.h"
16 namespace llvm {
18 /// An auxiliary type to facilitate extraction of 3-byte entities.
19 struct Uint24 {
20 uint8_t Bytes[3];
21 Uint24(uint8_t U) {
22 Bytes[0] = Bytes[1] = Bytes[2] = U;
24 Uint24(uint8_t U0, uint8_t U1, uint8_t U2) {
25 Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2;
27 uint32_t getAsUint32(bool IsLittleEndian) const {
28 int LoIx = IsLittleEndian ? 0 : 2;
29 return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16);
33 using uint24_t = Uint24;
34 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3");
36 /// Needed by swapByteOrder().
37 inline uint24_t getSwappedBytes(uint24_t C) {
38 return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]);
41 class DataExtractor {
42 StringRef Data;
43 uint8_t IsLittleEndian;
44 uint8_t AddressSize;
45 public:
46 /// A class representing a position in a DataExtractor, as well as any error
47 /// encountered during extraction. It enables one to extract a sequence of
48 /// values without error-checking and then checking for errors in bulk at the
49 /// end. The class holds an Error object, so failing to check the result of
50 /// the parse will result in a runtime error. The error flag is sticky and
51 /// will cause all subsequent extraction functions to fail without even
52 /// attempting to parse and without updating the Cursor offset. After clearing
53 /// the error flag, one can again use the Cursor object for parsing.
54 class Cursor {
55 uint64_t Offset;
56 Error Err;
58 friend class DataExtractor;
60 public:
61 /// Construct a cursor for extraction from the given offset.
62 explicit Cursor(uint64_t Offset) : Offset(Offset), Err(Error::success()) {}
64 /// Checks whether the cursor is valid (i.e. no errors were encountered). In
65 /// case of errors, this does not clear the error flag -- one must call
66 /// takeError() instead.
67 explicit operator bool() { return !Err; }
69 /// Return the current position of this Cursor. In the error state this is
70 /// the position of the Cursor before the first error was encountered.
71 uint64_t tell() const { return Offset; }
73 /// Return error contained inside this Cursor, if any. Clears the internal
74 /// Cursor state.
75 Error takeError() { return std::move(Err); }
78 /// Construct with a buffer that is owned by the caller.
79 ///
80 /// This constructor allows us to use data that is owned by the
81 /// caller. The data must stay around as long as this object is
82 /// valid.
83 DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
84 : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
86 /// Get the data pointed to by this extractor.
87 StringRef getData() const { return Data; }
88 /// Get the endianness for this extractor.
89 bool isLittleEndian() const { return IsLittleEndian; }
90 /// Get the address size for this extractor.
91 uint8_t getAddressSize() const { return AddressSize; }
92 /// Set the address size for this extractor.
93 void setAddressSize(uint8_t Size) { AddressSize = Size; }
95 /// Extract a C string from \a *offset_ptr.
96 ///
97 /// Returns a pointer to a C String from the data at the offset
98 /// pointed to by \a offset_ptr. A variable length NULL terminated C
99 /// string will be extracted and the \a offset_ptr will be
100 /// updated with the offset of the byte that follows the NULL
101 /// terminator byte.
103 /// @param[in,out] offset_ptr
104 /// A pointer to an offset within the data that will be advanced
105 /// by the appropriate number of bytes if the value is extracted
106 /// correctly. If the offset is out of bounds or there are not
107 /// enough bytes to extract this value, the offset will be left
108 /// unmodified.
110 /// @return
111 /// A pointer to the C string value in the data. If the offset
112 /// pointed to by \a offset_ptr is out of bounds, or if the
113 /// offset plus the length of the C string is out of bounds,
114 /// NULL will be returned.
115 const char *getCStr(uint64_t *offset_ptr) const;
117 /// Extract a C string from \a *offset_ptr.
119 /// Returns a StringRef for the C String from the data at the offset
120 /// pointed to by \a offset_ptr. A variable length NULL terminated C
121 /// string will be extracted and the \a offset_ptr will be
122 /// updated with the offset of the byte that follows the NULL
123 /// terminator byte.
125 /// \param[in,out] offset_ptr
126 /// A pointer to an offset within the data that will be advanced
127 /// by the appropriate number of bytes if the value is extracted
128 /// correctly. If the offset is out of bounds or there are not
129 /// enough bytes to extract this value, the offset will be left
130 /// unmodified.
132 /// \return
133 /// A StringRef for the C string value in the data. If the offset
134 /// pointed to by \a offset_ptr is out of bounds, or if the
135 /// offset plus the length of the C string is out of bounds,
136 /// a default-initialized StringRef will be returned.
137 StringRef getCStrRef(uint64_t *offset_ptr) const;
139 /// Extract an unsigned integer of size \a byte_size from \a
140 /// *offset_ptr.
142 /// Extract a single unsigned integer value and update the offset
143 /// pointed to by \a offset_ptr. The size of the extracted integer
144 /// is specified by the \a byte_size argument. \a byte_size should
145 /// have a value greater than or equal to one and less than or equal
146 /// to eight since the return value is 64 bits wide. Any
147 /// \a byte_size values less than 1 or greater than 8 will result in
148 /// nothing being extracted, and zero being returned.
150 /// @param[in,out] offset_ptr
151 /// A pointer to an offset within the data that will be advanced
152 /// by the appropriate number of bytes if the value is extracted
153 /// correctly. If the offset is out of bounds or there are not
154 /// enough bytes to extract this value, the offset will be left
155 /// unmodified.
157 /// @param[in] byte_size
158 /// The size in byte of the integer to extract.
160 /// @param[in,out] Err
161 /// A pointer to an Error object. Upon return the Error object is set to
162 /// indicate the result (success/failure) of the function. If the Error
163 /// object is already set when calling this function, no extraction is
164 /// performed.
166 /// @return
167 /// The unsigned integer value that was extracted, or zero on
168 /// failure.
169 uint64_t getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
170 Error *Err = nullptr) const;
172 /// Extract an unsigned integer of the given size from the location given by
173 /// the cursor. In case of an extraction error, or if the cursor is already in
174 /// an error state, zero is returned.
175 uint64_t getUnsigned(Cursor &C, uint32_t Size) const {
176 return getUnsigned(&C.Offset, Size, &C.Err);
179 /// Extract an signed integer of size \a byte_size from \a *offset_ptr.
181 /// Extract a single signed integer value (sign extending if required)
182 /// and update the offset pointed to by \a offset_ptr. The size of
183 /// the extracted integer is specified by the \a byte_size argument.
184 /// \a byte_size should have a value greater than or equal to one
185 /// and less than or equal to eight since the return value is 64
186 /// bits wide. Any \a byte_size values less than 1 or greater than
187 /// 8 will result in nothing being extracted, and zero being returned.
189 /// @param[in,out] offset_ptr
190 /// A pointer to an offset within the data that will be advanced
191 /// by the appropriate number of bytes if the value is extracted
192 /// correctly. If the offset is out of bounds or there are not
193 /// enough bytes to extract this value, the offset will be left
194 /// unmodified.
196 /// @param[in] size
197 /// The size in bytes of the integer to extract.
199 /// @return
200 /// The sign extended signed integer value that was extracted,
201 /// or zero on failure.
202 int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const;
204 //------------------------------------------------------------------
205 /// Extract an pointer from \a *offset_ptr.
207 /// Extract a single pointer from the data and update the offset
208 /// pointed to by \a offset_ptr. The size of the extracted pointer
209 /// is \a getAddressSize(), so the address size has to be
210 /// set correctly prior to extracting any pointer values.
212 /// @param[in,out] offset_ptr
213 /// A pointer to an offset within the data that will be advanced
214 /// by the appropriate number of bytes if the value is extracted
215 /// correctly. If the offset is out of bounds or there are not
216 /// enough bytes to extract this value, the offset will be left
217 /// unmodified.
219 /// @return
220 /// The extracted pointer value as a 64 integer.
221 uint64_t getAddress(uint64_t *offset_ptr) const {
222 return getUnsigned(offset_ptr, AddressSize);
225 /// Extract a pointer-sized unsigned integer from the location given by the
226 /// cursor. In case of an extraction error, or if the cursor is already in
227 /// an error state, zero is returned.
228 uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); }
230 /// Extract a uint8_t value from \a *offset_ptr.
232 /// Extract a single uint8_t from the binary data at the offset
233 /// pointed to by \a offset_ptr, and advance the offset on success.
235 /// @param[in,out] offset_ptr
236 /// A pointer to an offset within the data that will be advanced
237 /// by the appropriate number of bytes if the value is extracted
238 /// correctly. If the offset is out of bounds or there are not
239 /// enough bytes to extract this value, the offset will be left
240 /// unmodified.
242 /// @param[in,out] Err
243 /// A pointer to an Error object. Upon return the Error object is set to
244 /// indicate the result (success/failure) of the function. If the Error
245 /// object is already set when calling this function, no extraction is
246 /// performed.
248 /// @return
249 /// The extracted uint8_t value.
250 uint8_t getU8(uint64_t *offset_ptr, Error *Err = nullptr) const;
252 /// Extract a single uint8_t value from the location given by the cursor. In
253 /// case of an extraction error, or if the cursor is already in an error
254 /// state, zero is returned.
255 uint8_t getU8(Cursor &C) const { return getU8(&C.Offset, &C.Err); }
257 /// Extract \a count uint8_t values from \a *offset_ptr.
259 /// Extract \a count uint8_t values from the binary data at the
260 /// offset pointed to by \a offset_ptr, and advance the offset on
261 /// success. The extracted values are copied into \a dst.
263 /// @param[in,out] offset_ptr
264 /// A pointer to an offset within the data that will be advanced
265 /// by the appropriate number of bytes if the value is extracted
266 /// correctly. If the offset is out of bounds or there are not
267 /// enough bytes to extract this value, the offset will be left
268 /// unmodified.
270 /// @param[out] dst
271 /// A buffer to copy \a count uint8_t values into. \a dst must
272 /// be large enough to hold all requested data.
274 /// @param[in] count
275 /// The number of uint8_t values to extract.
277 /// @return
278 /// \a dst if all values were properly extracted and copied,
279 /// NULL otherise.
280 uint8_t *getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const;
282 /// Extract \a Count uint8_t values from the location given by the cursor and
283 /// store them into the destination buffer. In case of an extraction error, or
284 /// if the cursor is already in an error state, a nullptr is returned and the
285 /// destination buffer is left unchanged.
286 uint8_t *getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const;
288 /// Extract \a Count uint8_t values from the location given by the cursor and
289 /// store them into the destination vector. The vector is resized to fit the
290 /// extracted data. In case of an extraction error, or if the cursor is
291 /// already in an error state, the destination vector is left unchanged and
292 /// cursor is placed into an error state.
293 void getU8(Cursor &C, SmallVectorImpl<uint8_t> &Dst, uint32_t Count) const {
294 if (isValidOffsetForDataOfSize(C.Offset, Count))
295 Dst.resize(Count);
297 // This relies on the fact that getU8 will not attempt to write to the
298 // buffer if isValidOffsetForDataOfSize(C.Offset, Count) is false.
299 getU8(C, Dst.data(), Count);
302 //------------------------------------------------------------------
303 /// Extract a uint16_t value from \a *offset_ptr.
305 /// Extract a single uint16_t from the binary data at the offset
306 /// pointed to by \a offset_ptr, and update the offset on success.
308 /// @param[in,out] offset_ptr
309 /// A pointer to an offset within the data that will be advanced
310 /// by the appropriate number of bytes if the value is extracted
311 /// correctly. If the offset is out of bounds or there are not
312 /// enough bytes to extract this value, the offset will be left
313 /// unmodified.
315 /// @param[in,out] Err
316 /// A pointer to an Error object. Upon return the Error object is set to
317 /// indicate the result (success/failure) of the function. If the Error
318 /// object is already set when calling this function, no extraction is
319 /// performed.
321 /// @return
322 /// The extracted uint16_t value.
323 //------------------------------------------------------------------
324 uint16_t getU16(uint64_t *offset_ptr, Error *Err = nullptr) const;
326 /// Extract a single uint16_t value from the location given by the cursor. In
327 /// case of an extraction error, or if the cursor is already in an error
328 /// state, zero is returned.
329 uint16_t getU16(Cursor &C) const { return getU16(&C.Offset, &C.Err); }
331 /// Extract \a count uint16_t values from \a *offset_ptr.
333 /// Extract \a count uint16_t values from the binary data at the
334 /// offset pointed to by \a offset_ptr, and advance the offset on
335 /// success. The extracted values are copied into \a dst.
337 /// @param[in,out] offset_ptr
338 /// A pointer to an offset within the data that will be advanced
339 /// by the appropriate number of bytes if the value is extracted
340 /// correctly. If the offset is out of bounds or there are not
341 /// enough bytes to extract this value, the offset will be left
342 /// unmodified.
344 /// @param[out] dst
345 /// A buffer to copy \a count uint16_t values into. \a dst must
346 /// be large enough to hold all requested data.
348 /// @param[in] count
349 /// The number of uint16_t values to extract.
351 /// @return
352 /// \a dst if all values were properly extracted and copied,
353 /// NULL otherise.
354 uint16_t *getU16(uint64_t *offset_ptr, uint16_t *dst, uint32_t count) const;
356 /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
357 /// in a uint32_t.
359 /// Extract 3 bytes from the binary data at the offset pointed to by
360 /// \a offset_ptr, construct a uint32_t from them and update the offset
361 /// on success.
363 /// @param[in,out] offset_ptr
364 /// A pointer to an offset within the data that will be advanced
365 /// by the 3 bytes if the value is extracted correctly. If the offset
366 /// is out of bounds or there are not enough bytes to extract this value,
367 /// the offset will be left unmodified.
369 /// @return
370 /// The extracted 24-bit value represented in a uint32_t.
371 uint32_t getU24(uint64_t *offset_ptr) const;
373 /// Extract a uint32_t value from \a *offset_ptr.
375 /// Extract a single uint32_t from the binary data at the offset
376 /// pointed to by \a offset_ptr, and update the offset on success.
378 /// @param[in,out] offset_ptr
379 /// A pointer to an offset within the data that will be advanced
380 /// by the appropriate number of bytes if the value is extracted
381 /// correctly. If the offset is out of bounds or there are not
382 /// enough bytes to extract this value, the offset will be left
383 /// unmodified.
385 /// @param[in,out] Err
386 /// A pointer to an Error object. Upon return the Error object is set to
387 /// indicate the result (success/failure) of the function. If the Error
388 /// object is already set when calling this function, no extraction is
389 /// performed.
391 /// @return
392 /// The extracted uint32_t value.
393 uint32_t getU32(uint64_t *offset_ptr, Error *Err = nullptr) const;
395 /// Extract a single uint32_t value from the location given by the cursor. In
396 /// case of an extraction error, or if the cursor is already in an error
397 /// state, zero is returned.
398 uint32_t getU32(Cursor &C) const { return getU32(&C.Offset, &C.Err); }
400 /// Extract \a count uint32_t values from \a *offset_ptr.
402 /// Extract \a count uint32_t values from the binary data at the
403 /// offset pointed to by \a offset_ptr, and advance the offset on
404 /// success. The extracted values are copied into \a dst.
406 /// @param[in,out] offset_ptr
407 /// A pointer to an offset within the data that will be advanced
408 /// by the appropriate number of bytes if the value is extracted
409 /// correctly. If the offset is out of bounds or there are not
410 /// enough bytes to extract this value, the offset will be left
411 /// unmodified.
413 /// @param[out] dst
414 /// A buffer to copy \a count uint32_t values into. \a dst must
415 /// be large enough to hold all requested data.
417 /// @param[in] count
418 /// The number of uint32_t values to extract.
420 /// @return
421 /// \a dst if all values were properly extracted and copied,
422 /// NULL otherise.
423 uint32_t *getU32(uint64_t *offset_ptr, uint32_t *dst, uint32_t count) const;
425 /// Extract a uint64_t value from \a *offset_ptr.
427 /// Extract a single uint64_t from the binary data at the offset
428 /// pointed to by \a offset_ptr, and update the offset on success.
430 /// @param[in,out] offset_ptr
431 /// A pointer to an offset within the data that will be advanced
432 /// by the appropriate number of bytes if the value is extracted
433 /// correctly. If the offset is out of bounds or there are not
434 /// enough bytes to extract this value, the offset will be left
435 /// unmodified.
437 /// @param[in,out] Err
438 /// A pointer to an Error object. Upon return the Error object is set to
439 /// indicate the result (success/failure) of the function. If the Error
440 /// object is already set when calling this function, no extraction is
441 /// performed.
443 /// @return
444 /// The extracted uint64_t value.
445 uint64_t getU64(uint64_t *offset_ptr, Error *Err = nullptr) const;
447 /// Extract a single uint64_t value from the location given by the cursor. In
448 /// case of an extraction error, or if the cursor is already in an error
449 /// state, zero is returned.
450 uint64_t getU64(Cursor &C) const { return getU64(&C.Offset, &C.Err); }
452 /// Extract \a count uint64_t values from \a *offset_ptr.
454 /// Extract \a count uint64_t values from the binary data at the
455 /// offset pointed to by \a offset_ptr, and advance the offset on
456 /// success. The extracted values are copied into \a dst.
458 /// @param[in,out] offset_ptr
459 /// A pointer to an offset within the data that will be advanced
460 /// by the appropriate number of bytes if the value is extracted
461 /// correctly. If the offset is out of bounds or there are not
462 /// enough bytes to extract this value, the offset will be left
463 /// unmodified.
465 /// @param[out] dst
466 /// A buffer to copy \a count uint64_t values into. \a dst must
467 /// be large enough to hold all requested data.
469 /// @param[in] count
470 /// The number of uint64_t values to extract.
472 /// @return
473 /// \a dst if all values were properly extracted and copied,
474 /// NULL otherise.
475 uint64_t *getU64(uint64_t *offset_ptr, uint64_t *dst, uint32_t count) const;
477 /// Extract a signed LEB128 value from \a *offset_ptr.
479 /// Extracts an signed LEB128 number from this object's data
480 /// starting at the offset pointed to by \a offset_ptr. The offset
481 /// pointed to by \a offset_ptr will be updated with the offset of
482 /// the byte following the last extracted byte.
484 /// @param[in,out] offset_ptr
485 /// A pointer to an offset within the data that will be advanced
486 /// by the appropriate number of bytes if the value is extracted
487 /// correctly. If the offset is out of bounds or there are not
488 /// enough bytes to extract this value, the offset will be left
489 /// unmodified.
491 /// @return
492 /// The extracted signed integer value.
493 int64_t getSLEB128(uint64_t *offset_ptr) const;
495 /// Extract a unsigned LEB128 value from \a *offset_ptr.
497 /// Extracts an unsigned LEB128 number from this object's data
498 /// starting at the offset pointed to by \a offset_ptr. The offset
499 /// pointed to by \a offset_ptr will be updated with the offset of
500 /// the byte following the last extracted byte.
502 /// @param[in,out] offset_ptr
503 /// A pointer to an offset within the data that will be advanced
504 /// by the appropriate number of bytes if the value is extracted
505 /// correctly. If the offset is out of bounds or there are not
506 /// enough bytes to extract this value, the offset will be left
507 /// unmodified.
509 /// @param[in,out] Err
510 /// A pointer to an Error object. Upon return the Error object is set to
511 /// indicate the result (success/failure) of the function. If the Error
512 /// object is already set when calling this function, no extraction is
513 /// performed.
515 /// @return
516 /// The extracted unsigned integer value.
517 uint64_t getULEB128(uint64_t *offset_ptr, llvm::Error *Err = nullptr) const;
519 /// Extract an unsigned ULEB128 value from the location given by the cursor.
520 /// In case of an extraction error, or if the cursor is already in an error
521 /// state, zero is returned.
522 uint64_t getULEB128(Cursor &C) const { return getULEB128(&C.Offset, &C.Err); }
524 /// Advance the Cursor position by the given number of bytes. No-op if the
525 /// cursor is in an error state.
526 void skip(Cursor &C, uint64_t Length) const;
528 /// Return true iff the cursor is at the end of the buffer, regardless of the
529 /// error state of the cursor. The only way both eof and error states can be
530 /// true is if one attempts a read while the cursor is at the very end of the
531 /// data buffer.
532 bool eof(const Cursor &C) const { return Data.size() == C.Offset; }
534 /// Test the validity of \a offset.
536 /// @return
537 /// \b true if \a offset is a valid offset into the data in this
538 /// object, \b false otherwise.
539 bool isValidOffset(uint64_t offset) const { return Data.size() > offset; }
541 /// Test the availability of \a length bytes of data from \a offset.
543 /// @return
544 /// \b true if \a offset is a valid offset and there are \a
545 /// length bytes available at that offset, \b false otherwise.
546 bool isValidOffsetForDataOfSize(uint64_t offset, uint64_t length) const {
547 return offset + length >= offset && isValidOffset(offset + length - 1);
550 /// Test the availability of enough bytes of data for a pointer from
551 /// \a offset. The size of a pointer is \a getAddressSize().
553 /// @return
554 /// \b true if \a offset is a valid offset and there are enough
555 /// bytes for a pointer available at that offset, \b false
556 /// otherwise.
557 bool isValidOffsetForAddress(uint64_t offset) const {
558 return isValidOffsetForDataOfSize(offset, AddressSize);
561 protected:
562 // Make it possible for subclasses to access these fields without making them
563 // public.
564 static uint64_t &getOffset(Cursor &C) { return C.Offset; }
565 static Error &getError(Cursor &C) { return C.Err; }
568 } // namespace llvm
570 #endif