2 * Copyright 2010-2011 PathScale, Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
7 * 1. Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
15 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 * dwarf_eh.h - Defines some helper functions for parsing DWARF exception
30 * This file contains various helper functions that are independent of the
31 * language-specific code. It can be used in any personality function for the
36 // TODO: Factor out Itanium / ARM differences. We probably want an itanium.h
37 // and arm.h that can be included by this file depending on the target ABI.
39 // _GNU_SOURCE must be defined for unwind.h to expose some of the functions
40 // that we want. If it isn't, then we define it and undefine it to make sure
41 // that it doesn't impact the rest of the program.
43 # define _GNU_SOURCE 1
52 /// Type used for pointers into DWARF data
53 typedef unsigned char *dw_eh_ptr_t
;
55 // Flag indicating a signed quantity
56 #define DW_EH_PE_signed 0x08
57 /// DWARF data encoding types.
58 enum dwarf_data_encoding
60 /// Absolute pointer value
61 DW_EH_PE_absptr
= 0x00,
62 /// Unsigned, little-endian, base 128-encoded (variable length).
63 DW_EH_PE_uleb128
= 0x01,
64 /// Unsigned 16-bit integer.
65 DW_EH_PE_udata2
= 0x02,
66 /// Unsigned 32-bit integer.
67 DW_EH_PE_udata4
= 0x03,
68 /// Unsigned 64-bit integer.
69 DW_EH_PE_udata8
= 0x04,
70 /// Signed, little-endian, base 128-encoded (variable length)
71 DW_EH_PE_sleb128
= DW_EH_PE_uleb128
| DW_EH_PE_signed
,
72 /// Signed 16-bit integer.
73 DW_EH_PE_sdata2
= DW_EH_PE_udata2
| DW_EH_PE_signed
,
74 /// Signed 32-bit integer.
75 DW_EH_PE_sdata4
= DW_EH_PE_udata4
| DW_EH_PE_signed
,
76 /// Signed 32-bit integer.
77 DW_EH_PE_sdata8
= DW_EH_PE_udata8
| DW_EH_PE_signed
81 * Returns the encoding for a DWARF EH table entry. The encoding is stored in
82 * the low four of an octet. The high four bits store the addressing mode.
84 static inline enum dwarf_data_encoding
get_encoding(unsigned char x
)
86 return (enum dwarf_data_encoding
)(x
& 0xf);
90 * DWARF addressing mode constants. When reading a pointer value from a DWARF
91 * exception table, you must know how it is stored and what the addressing mode
92 * is. The low four bits tell you the encoding, allowing you to decode a
93 * number. The high four bits tell you the addressing mode, allowing you to
94 * turn that number into an address in memory.
96 enum dwarf_data_relative
100 /// Value relative to program counter
101 DW_EH_PE_pcrel
= 0x10,
102 /// Value relative to the text segment
103 DW_EH_PE_textrel
= 0x20,
104 /// Value relative to the data segment
105 DW_EH_PE_datarel
= 0x30,
106 /// Value relative to the start of the function
107 DW_EH_PE_funcrel
= 0x40,
108 /// Aligned pointer (Not supported yet - are they actually used?)
109 DW_EH_PE_aligned
= 0x50,
110 /// Pointer points to address of real value
111 DW_EH_PE_indirect
= 0x80
114 * Returns the addressing mode component of this encoding.
116 static inline enum dwarf_data_relative
get_base(unsigned char x
)
118 return (enum dwarf_data_relative
)(x
& 0x70);
121 * Returns whether an encoding represents an indirect address.
123 static int is_indirect(unsigned char x
)
125 return ((x
& DW_EH_PE_indirect
) == DW_EH_PE_indirect
);
129 * Returns the size of a fixed-size encoding. This function will abort if
130 * called with a value that is not a fixed-size encoding.
132 static inline int dwarf_size_of_fixed_size_field(unsigned char type
)
134 switch (get_encoding(type
))
137 case DW_EH_PE_sdata2
:
138 case DW_EH_PE_udata2
: return 2;
139 case DW_EH_PE_sdata4
:
140 case DW_EH_PE_udata4
: return 4;
141 case DW_EH_PE_sdata8
:
142 case DW_EH_PE_udata8
: return 8;
143 case DW_EH_PE_absptr
: return sizeof(void*);
148 * Read an unsigned, little-endian, base-128, DWARF value. Updates *data to
149 * point to the end of the value. Stores the number of bits read in the value
150 * pointed to by b, allowing you to determine the value of the highest bit, and
151 * therefore the sign of a signed value.
153 * This function is not intended to be called directly. Use read_sleb128() or
154 * read_uleb128() for reading signed and unsigned versions, respectively.
156 static uint64_t read_leb128(dw_eh_ptr_t
*data
, int *b
)
159 unsigned int bit
= 0;
160 unsigned char digit
= 0;
161 // We have to read at least one octet, and keep reading until we get to one
162 // with the high bit unset
165 // This check is a bit too strict - we should also check the highest
167 assert(bit
< sizeof(uint64_t) * 8);
168 // Get the base 128 digit
169 digit
= (**data
) & 0x7f;
170 // Add it to the current value
171 uleb
+= digit
<< bit
;
172 // Increase the shift value
174 // Proceed to the next octet
176 // Terminate when we reach a value that does not have the high bit set
177 // (i.e. which was not modified when we mask it with 0x7f)
178 } while ((*(*data
- 1)) != digit
);
185 * Reads an unsigned little-endian base-128 value starting at the address
186 * pointed to by *data. Updates *data to point to the next byte after the end
187 * of the variable-length value.
189 static int64_t read_uleb128(dw_eh_ptr_t
*data
)
192 return read_leb128(data
, &b
);
196 * Reads a signed little-endian base-128 value starting at the address pointed
197 * to by *data. Updates *data to point to the next byte after the end of the
198 * variable-length value.
200 static int64_t read_sleb128(dw_eh_ptr_t
*data
)
203 // Read as if it's signed
204 uint64_t uleb
= read_leb128(data
, &bits
);
205 // If the most significant bit read is 1, then we need to sign extend it
206 if ((uleb
>> (bits
-1)) == 1)
208 // Sign extend by setting all bits in front of it to 1
209 uleb
|= ((int64_t)-1) << bits
;
211 return (int64_t)uleb
;
214 * Reads a value using the specified encoding from the address pointed to by
215 * *data. Updates the value of *data to point to the next byte after the end
218 static uint64_t read_value(char encoding
, dw_eh_ptr_t
*data
)
220 enum dwarf_data_encoding type
= get_encoding(encoding
);
224 // Read fixed-length types
225 #define READ(dwarf, type) \
227 v = (uint64_t)(*(type*)(*data));\
228 *data += sizeof(type);\
230 READ(DW_EH_PE_udata2
, uint16_t)
231 READ(DW_EH_PE_udata4
, uint32_t)
232 READ(DW_EH_PE_udata8
, uint64_t)
233 READ(DW_EH_PE_sdata2
, int16_t)
234 READ(DW_EH_PE_sdata4
, int32_t)
235 READ(DW_EH_PE_sdata8
, int64_t)
236 READ(DW_EH_PE_absptr
, intptr_t)
238 // Read variable-length types
239 case DW_EH_PE_sleb128
:
240 v
= read_sleb128(data
);
242 case DW_EH_PE_uleb128
:
243 v
= read_uleb128(data
);
252 * Resolves an indirect value. This expects an unwind context, an encoding, a
253 * decoded value, and the start of the region as arguments. The returned value
254 * is a pointer to the address identified by the encoded value.
256 * If the encoding does not specify an indirect value, then this returns v.
258 static uint64_t resolve_indirect_value(_Unwind_Context
*c
,
259 unsigned char encoding
,
263 switch (get_base(encoding
))
266 v
+= (uint64_t)start
;
268 case DW_EH_PE_textrel
:
269 v
+= (uint64_t)_Unwind_GetTextRelBase(c
);
271 case DW_EH_PE_datarel
:
272 v
+= (uint64_t)_Unwind_GetDataRelBase(c
);
274 case DW_EH_PE_funcrel
:
275 v
+= (uint64_t)_Unwind_GetRegionStart(c
);
279 // If this is an indirect value, then it is really the address of the real
281 // TODO: Check whether this should really always be a pointer - it seems to
282 // be a GCC extensions, so not properly documented...
283 if (is_indirect(encoding
))
285 v
= (uint64_t)(uintptr_t)*(void**)v
;
292 * Reads an encoding and a value, updating *data to point to the next byte.
294 static inline void read_value_with_encoding(_Unwind_Context
*context
,
298 dw_eh_ptr_t start
= *data
;
299 unsigned char encoding
= *((*data
)++);
300 // If this value is omitted, skip it and don't touch the output value
301 if (encoding
== DW_EH_PE_omit
) { return; }
303 *out
= read_value(encoding
, data
);
304 *out
= resolve_indirect_value(context
, encoding
, *out
, start
);
308 * Structure storing a decoded language-specific data area. Use parse_lsda()
309 * to generate an instance of this structure from the address returned by the
310 * generic unwind library.
312 * You should not need to inspect the fields of this structure directly if you
313 * are just using this header. The structure stores the locations of the
314 * various tables used for unwinding exceptions and is used by the functions
315 * for reading values from these tables.
319 /// The start of the region. This is a cache of the value returned by
320 /// _Unwind_GetRegionStart().
321 dw_eh_ptr_t region_start
;
322 /// The start of the landing pads table.
323 dw_eh_ptr_t landing_pads
;
324 /// The start of the type table.
325 dw_eh_ptr_t type_table
;
326 /// The encoding used for entries in the type tables.
327 unsigned char type_table_encoding
;
328 /// The location of the call-site table.
329 dw_eh_ptr_t call_site_table
;
330 /// The location of the action table.
331 dw_eh_ptr_t action_table
;
332 /// The encoding used for entries in the call-site table.
333 unsigned char callsite_encoding
;
337 * Parse the header on the language-specific data area and return a structure
338 * containing the addresses and encodings of the various tables.
340 static inline struct dwarf_eh_lsda
parse_lsda(_Unwind_Context
*context
,
343 struct dwarf_eh_lsda lsda
;
345 lsda
.region_start
= (dw_eh_ptr_t
)(uintptr_t)_Unwind_GetRegionStart(context
);
347 // If the landing pads are relative to anything other than the start of
348 // this region, find out where. This is @LPStart in the spec, although the
349 // encoding that GCC uses does not quite match the spec.
350 uint64_t v
= (uint64_t)(uintptr_t)lsda
.region_start
;
351 read_value_with_encoding(context
, &data
, &v
);
352 lsda
.landing_pads
= (dw_eh_ptr_t
)(uintptr_t)v
;
354 // If there is a type table, find out where it is. This is @TTBase in the
355 // spec. Note: we find whether there is a type table pointer by checking
356 // whether the leading byte is DW_EH_PE_omit (0xff), which is not what the
357 // spec says, but does seem to be how G++ indicates this.
359 lsda
.type_table_encoding
= *data
++;
360 if (lsda
.type_table_encoding
!= DW_EH_PE_omit
)
362 v
= read_uleb128(&data
);
363 dw_eh_ptr_t type_table
= data
;
365 lsda
.type_table
= type_table
;
366 //lsda.type_table = (uintptr_t*)(data + v);
369 lsda
.type_table_encoding
= (DW_EH_PE_pcrel
| DW_EH_PE_indirect
);
372 lsda
.callsite_encoding
= (enum dwarf_data_encoding
)(*(data
++));
374 // Action table is immediately after the call site table
375 lsda
.action_table
= data
;
376 uintptr_t callsite_size
= (uintptr_t)read_uleb128(&data
);
377 lsda
.action_table
= data
+ callsite_size
;
378 // Call site table is immediately after the header
379 lsda
.call_site_table
= (dw_eh_ptr_t
)data
;
386 * Structure representing an action to be performed while unwinding. This
387 * contains the address that should be unwound to and the action record that
388 * provoked this action.
390 struct dwarf_eh_action
393 * The address that this action directs should be the new program counter
394 * value after unwinding.
396 dw_eh_ptr_t landing_pad
;
397 /// The address of the action record.
398 dw_eh_ptr_t action_record
;
402 * Look up the landing pad that corresponds to the current invoke.
403 * Returns true if record exists. The context is provided by the generic
404 * unwind library and the lsda should be the result of a call to parse_lsda().
406 * The action record is returned via the result parameter.
408 static bool dwarf_eh_find_callsite(struct _Unwind_Context
*context
,
409 struct dwarf_eh_lsda
*lsda
,
410 struct dwarf_eh_action
*result
)
412 result
->action_record
= 0;
413 result
->landing_pad
= 0;
414 // The current instruction pointer offset within the region
415 uint64_t ip
= _Unwind_GetIP(context
) - _Unwind_GetRegionStart(context
);
416 unsigned char *callsite_table
= (unsigned char*)lsda
->call_site_table
;
418 while (callsite_table
<= lsda
->action_table
)
420 // Once again, the layout deviates from the spec.
421 uint64_t call_site_start
, call_site_size
, landing_pad
, action
;
422 call_site_start
= read_value(lsda
->callsite_encoding
, &callsite_table
);
423 call_site_size
= read_value(lsda
->callsite_encoding
, &callsite_table
);
425 // Call site entries are sorted, so if we find a call site that's after
426 // the current instruction pointer then there is no action associated
427 // with this call and we should unwind straight through this frame
428 // without doing anything.
429 if (call_site_start
> ip
) { break; }
431 // Read the address of the landing pad and the action from the call
433 landing_pad
= read_value(lsda
->callsite_encoding
, &callsite_table
);
434 action
= read_uleb128(&callsite_table
);
436 // We should not include the call_site_start (beginning of the region)
437 // address in the ip range. For each call site:
439 // address1: call proc
440 // address2: next instruction
442 // The call stack contains address2 and not address1, address1 can be
443 // at the end of another EH region.
444 if (call_site_start
< ip
&& ip
<= call_site_start
+ call_site_size
)
448 // Action records are 1-biased so both no-record and zeroth
449 // record can be stored.
450 result
->action_record
= lsda
->action_table
+ action
- 1;
452 // No landing pad means keep unwinding.
455 // Landing pad is the offset from the value in the header
456 result
->landing_pad
= lsda
->landing_pads
+ landing_pad
;
464 /// Defines an exception class from 8 bytes (endian independent)
465 #define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \
466 (((uint64_t)a << 56) +\
467 ((uint64_t)b << 48) +\
468 ((uint64_t)c << 40) +\
469 ((uint64_t)d << 32) +\
470 ((uint64_t)e << 24) +\
471 ((uint64_t)f << 16) +\
472 ((uint64_t)g << 8) +\
475 #define GENERIC_EXCEPTION_CLASS(e,f,g,h) \
476 ((uint32_t)e << 24) +\
477 ((uint32_t)f << 16) +\
478 ((uint32_t)g << 8) +\