dwarf_eh.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /*
  2. * Copyright 2010-2011 PathScale, Inc. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. *
  7. * 1. Redistributions of source code must retain the above copyright notice,
  8. * this list of conditions and the following disclaimer.
  9. *
  10. * 2. Redistributions in binary form must reproduce the above copyright notice,
  11. * this list of conditions and the following disclaimer in the documentation
  12. * and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
  15. * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  16. * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
  18. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  19. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  20. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  21. * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  22. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  23. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  24. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /**
  27. * dwarf_eh.h - Defines some helper functions for parsing DWARF exception
  28. * handling tables.
  29. *
  30. * This file contains various helper functions that are independent of the
  31. * language-specific code. It can be used in any personality function for the
  32. * Itanium ABI.
  33. */
  34. #include <assert.h>
  35. // TODO: Factor out Itanium / ARM differences. We probably want an itanium.h
  36. // and arm.h that can be included by this file depending on the target ABI.
  37. // _GNU_SOURCE must be defined for unwind.h to expose some of the functions
  38. // that we want. If it isn't, then we define it and undefine it to make sure
  39. // that it doesn't impact the rest of the program.
  40. #ifndef _GNU_SOURCE
  41. # define _GNU_SOURCE 1
  42. # include "unwind.h"
  43. # undef _GNU_SOURCE
  44. #else
  45. # include "unwind.h"
  46. #endif
  47. #include <stdint.h>
  48. /// Type used for pointers into DWARF data
  49. typedef unsigned char *dw_eh_ptr_t;
  50. // Flag indicating a signed quantity
  51. #define DW_EH_PE_signed 0x08
  52. /// DWARF data encoding types.
  53. enum dwarf_data_encoding
  54. {
  55. /// Unsigned, little-endian, base 128-encoded (variable length).
  56. DW_EH_PE_uleb128 = 0x01,
  57. /// Unsigned 16-bit integer.
  58. DW_EH_PE_udata2 = 0x02,
  59. /// Unsigned 32-bit integer.
  60. DW_EH_PE_udata4 = 0x03,
  61. /// Unsigned 64-bit integer.
  62. DW_EH_PE_udata8 = 0x04,
  63. /// Signed, little-endian, base 128-encoded (variable length)
  64. DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed,
  65. /// Signed 16-bit integer.
  66. DW_EH_PE_sdata2 = DW_EH_PE_udata2 | DW_EH_PE_signed,
  67. /// Signed 32-bit integer.
  68. DW_EH_PE_sdata4 = DW_EH_PE_udata4 | DW_EH_PE_signed,
  69. /// Signed 32-bit integer.
  70. DW_EH_PE_sdata8 = DW_EH_PE_udata8 | DW_EH_PE_signed
  71. };
  72. /**
  73. * Returns the encoding for a DWARF EH table entry. The encoding is stored in
  74. * the low four of an octet. The high four bits store the addressing mode.
  75. */
  76. static inline enum dwarf_data_encoding get_encoding(unsigned char x)
  77. {
  78. return (enum dwarf_data_encoding)(x & 0xf);
  79. }
  80. /**
  81. * DWARF addressing mode constants. When reading a pointer value from a DWARF
  82. * exception table, you must know how it is stored and what the addressing mode
  83. * is. The low four bits tell you the encoding, allowing you to decode a
  84. * number. The high four bits tell you the addressing mode, allowing you to
  85. * turn that number into an address in memory.
  86. */
  87. enum dwarf_data_relative
  88. {
  89. /// Value is omitted
  90. DW_EH_PE_omit = 0xff,
  91. /// Absolute pointer value
  92. DW_EH_PE_absptr = 0x00,
  93. /// Value relative to program counter
  94. DW_EH_PE_pcrel = 0x10,
  95. /// Value relative to the text segment
  96. DW_EH_PE_textrel = 0x20,
  97. /// Value relative to the data segment
  98. DW_EH_PE_datarel = 0x30,
  99. /// Value relative to the start of the function
  100. DW_EH_PE_funcrel = 0x40,
  101. /// Aligned pointer (Not supported yet - are they actually used?)
  102. DW_EH_PE_aligned = 0x50,
  103. /// Pointer points to address of real value
  104. DW_EH_PE_indirect = 0x80
  105. };
  106. /**
  107. * Returns the addressing mode component of this encoding.
  108. */
  109. static inline enum dwarf_data_relative get_base(unsigned char x)
  110. {
  111. return (enum dwarf_data_relative)(x & 0x70);
  112. }
  113. /**
  114. * Returns whether an encoding represents an indirect address.
  115. */
  116. static int is_indirect(unsigned char x)
  117. {
  118. return ((x & DW_EH_PE_indirect) == DW_EH_PE_indirect);
  119. }
  120. /**
  121. * Returns the size of a fixed-size encoding. This function will abort if
  122. * called with a value that is not a fixed-size encoding.
  123. */
  124. static inline int dwarf_size_of_fixed_size_field(unsigned char type)
  125. {
  126. switch (get_encoding(type))
  127. {
  128. default: abort();
  129. case DW_EH_PE_sdata2:
  130. case DW_EH_PE_udata2: return 2;
  131. case DW_EH_PE_sdata4:
  132. case DW_EH_PE_udata4: return 4;
  133. case DW_EH_PE_sdata8:
  134. case DW_EH_PE_udata8: return 8;
  135. case DW_EH_PE_absptr: return sizeof(void*);
  136. }
  137. }
  138. /**
  139. * Read an unsigned, little-endian, base-128, DWARF value. Updates *data to
  140. * point to the end of the value. Stores the number of bits read in the value
  141. * pointed to by b, allowing you to determine the value of the highest bit, and
  142. * therefore the sign of a signed value.
  143. *
  144. * This function is not intended to be called directly. Use read_sleb128() or
  145. * read_uleb128() for reading signed and unsigned versions, respectively.
  146. */
  147. static uint64_t read_leb128(dw_eh_ptr_t *data, int *b)
  148. {
  149. uint64_t uleb = 0;
  150. unsigned int bit = 0;
  151. unsigned char digit = 0;
  152. // We have to read at least one octet, and keep reading until we get to one
  153. // with the high bit unset
  154. do
  155. {
  156. // This check is a bit too strict - we should also check the highest
  157. // bit of the digit.
  158. assert(bit < sizeof(uint64_t) * 8);
  159. // Get the base 128 digit
  160. digit = (**data) & 0x7f;
  161. // Add it to the current value
  162. uleb += digit << bit;
  163. // Increase the shift value
  164. bit += 7;
  165. // Proceed to the next octet
  166. (*data)++;
  167. // Terminate when we reach a value that does not have the high bit set
  168. // (i.e. which was not modified when we mask it with 0x7f)
  169. } while ((*(*data - 1)) != digit);
  170. *b = bit;
  171. return uleb;
  172. }
  173. /**
  174. * Reads an unsigned little-endian base-128 value starting at the address
  175. * pointed to by *data. Updates *data to point to the next byte after the end
  176. * of the variable-length value.
  177. */
  178. static int64_t read_uleb128(dw_eh_ptr_t *data)
  179. {
  180. int b;
  181. return read_leb128(data, &b);
  182. }
  183. /**
  184. * Reads a signed little-endian base-128 value starting at the address pointed
  185. * to by *data. Updates *data to point to the next byte after the end of the
  186. * variable-length value.
  187. */
  188. static int64_t read_sleb128(dw_eh_ptr_t *data)
  189. {
  190. int bits;
  191. // Read as if it's signed
  192. uint64_t uleb = read_leb128(data, &bits);
  193. // If the most significant bit read is 1, then we need to sign extend it
  194. if ((uleb >> (bits-1)) == 1)
  195. {
  196. // Sign extend by setting all bits in front of it to 1
  197. uleb |= ((int64_t)-1) << bits;
  198. }
  199. return (int64_t)uleb;
  200. }
  201. /**
  202. * Reads a value using the specified encoding from the address pointed to by
  203. * *data. Updates the value of *data to point to the next byte after the end
  204. * of the data.
  205. */
  206. static uint64_t read_value(char encoding, dw_eh_ptr_t *data)
  207. {
  208. enum dwarf_data_encoding type = get_encoding(encoding);
  209. uint64_t v;
  210. switch (type)
  211. {
  212. // Read fixed-length types
  213. #define READ(dwarf, type) \
  214. case dwarf:\
  215. v = (uint64_t)(*(type*)(*data));\
  216. *data += sizeof(type);\
  217. break;
  218. READ(DW_EH_PE_udata2, uint16_t)
  219. READ(DW_EH_PE_udata4, uint32_t)
  220. READ(DW_EH_PE_udata8, uint64_t)
  221. READ(DW_EH_PE_sdata2, int16_t)
  222. READ(DW_EH_PE_sdata4, int32_t)
  223. READ(DW_EH_PE_sdata8, int64_t)
  224. READ(DW_EH_PE_absptr, intptr_t)
  225. #undef READ
  226. // Read variable-length types
  227. case DW_EH_PE_sleb128:
  228. v = read_sleb128(data);
  229. break;
  230. case DW_EH_PE_uleb128:
  231. v = read_uleb128(data);
  232. break;
  233. default: abort();
  234. }
  235. return v;
  236. }
  237. /**
  238. * Resolves an indirect value. This expects an unwind context, an encoding, a
  239. * decoded value, and the start of the region as arguments. The returned value
  240. * is a pointer to the address identified by the encoded value.
  241. *
  242. * If the encoding does not specify an indirect value, then this returns v.
  243. */
  244. static uint64_t resolve_indirect_value(_Unwind_Context *c,
  245. unsigned char encoding,
  246. int64_t v,
  247. dw_eh_ptr_t start)
  248. {
  249. switch (get_base(encoding))
  250. {
  251. case DW_EH_PE_pcrel:
  252. v += (uint64_t)start;
  253. break;
  254. case DW_EH_PE_textrel:
  255. v += (uint64_t)_Unwind_GetTextRelBase(c);
  256. break;
  257. case DW_EH_PE_datarel:
  258. v += (uint64_t)_Unwind_GetDataRelBase(c);
  259. break;
  260. case DW_EH_PE_funcrel:
  261. v += (uint64_t)_Unwind_GetRegionStart(c);
  262. default:
  263. break;
  264. }
  265. // If this is an indirect value, then it is really the address of the real
  266. // value
  267. // TODO: Check whether this should really always be a pointer - it seems to
  268. // be a GCC extensions, so not properly documented...
  269. if (is_indirect(encoding))
  270. {
  271. v = (uint64_t)(uintptr_t)*(void**)v;
  272. }
  273. return v;
  274. }
  275. /**
  276. * Reads an encoding and a value, updating *data to point to the next byte.
  277. */
  278. static inline void read_value_with_encoding(_Unwind_Context *context,
  279. dw_eh_ptr_t *data,
  280. uint64_t *out)
  281. {
  282. dw_eh_ptr_t start = *data;
  283. unsigned char encoding = *((*data)++);
  284. // If this value is omitted, skip it and don't touch the output value
  285. if (encoding == DW_EH_PE_omit) { return; }
  286. *out = read_value(encoding, data);
  287. *out = resolve_indirect_value(context, encoding, *out, start);
  288. }
  289. /**
  290. * Structure storing a decoded language-specific data area. Use parse_lsda()
  291. * to generate an instance of this structure from the address returned by the
  292. * generic unwind library.
  293. *
  294. * You should not need to inspect the fields of this structure directly if you
  295. * are just using this header. The structure stores the locations of the
  296. * various tables used for unwinding exceptions and is used by the functions
  297. * for reading values from these tables.
  298. */
  299. struct dwarf_eh_lsda
  300. {
  301. /// The start of the region. This is a cache of the value returned by
  302. /// _Unwind_GetRegionStart().
  303. dw_eh_ptr_t region_start;
  304. /// The start of the landing pads table.
  305. dw_eh_ptr_t landing_pads;
  306. /// The start of the type table.
  307. dw_eh_ptr_t type_table;
  308. /// The encoding used for entries in the type tables.
  309. unsigned char type_table_encoding;
  310. /// The location of the call-site table.
  311. dw_eh_ptr_t call_site_table;
  312. /// The location of the action table.
  313. dw_eh_ptr_t action_table;
  314. /// The encoding used for entries in the call-site table.
  315. unsigned char callsite_encoding;
  316. };
  317. /**
  318. * Parse the header on the language-specific data area and return a structure
  319. * containing the addresses and encodings of the various tables.
  320. */
  321. static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context,
  322. unsigned char *data)
  323. {
  324. struct dwarf_eh_lsda lsda;
  325. lsda.region_start = (dw_eh_ptr_t)(uintptr_t)_Unwind_GetRegionStart(context);
  326. // If the landing pads are relative to anything other than the start of
  327. // this region, find out where. This is @LPStart in the spec, although the
  328. // encoding that GCC uses does not quite match the spec.
  329. uint64_t v = (uint64_t)(uintptr_t)lsda.region_start;
  330. read_value_with_encoding(context, &data, &v);
  331. lsda.landing_pads = (dw_eh_ptr_t)(uintptr_t)v;
  332. // If there is a type table, find out where it is. This is @TTBase in the
  333. // spec. Note: we find whether there is a type table pointer by checking
  334. // whether the leading byte is DW_EH_PE_omit (0xff), which is not what the
  335. // spec says, but does seem to be how G++ indicates this.
  336. lsda.type_table = 0;
  337. lsda.type_table_encoding = *data++;
  338. if (lsda.type_table_encoding != DW_EH_PE_omit)
  339. {
  340. v = read_uleb128(&data);
  341. dw_eh_ptr_t type_table = data;
  342. type_table += v;
  343. lsda.type_table = type_table;
  344. //lsda.type_table = (uintptr_t*)(data + v);
  345. }
  346. #if __arm__
  347. lsda.type_table_encoding = (DW_EH_PE_pcrel | DW_EH_PE_indirect);
  348. #endif
  349. lsda.callsite_encoding = (enum dwarf_data_encoding)(*(data++));
  350. // Action table is immediately after the call site table
  351. lsda.action_table = data;
  352. uintptr_t callsite_size = (uintptr_t)read_uleb128(&data);
  353. lsda.action_table = data + callsite_size;
  354. // Call site table is immediately after the header
  355. lsda.call_site_table = (dw_eh_ptr_t)data;
  356. return lsda;
  357. }
  358. /**
  359. * Structure representing an action to be performed while unwinding. This
  360. * contains the address that should be unwound to and the action record that
  361. * provoked this action.
  362. */
  363. struct dwarf_eh_action
  364. {
  365. /**
  366. * The address that this action directs should be the new program counter
  367. * value after unwinding.
  368. */
  369. dw_eh_ptr_t landing_pad;
  370. /// The address of the action record.
  371. dw_eh_ptr_t action_record;
  372. };
  373. /**
  374. * Look up the landing pad that corresponds to the current invoke.
  375. * Returns true if record exists. The context is provided by the generic
  376. * unwind library and the lsda should be the result of a call to parse_lsda().
  377. *
  378. * The action record is returned via the result parameter.
  379. */
  380. static bool dwarf_eh_find_callsite(struct _Unwind_Context *context,
  381. struct dwarf_eh_lsda *lsda,
  382. struct dwarf_eh_action *result)
  383. {
  384. result->action_record = 0;
  385. result->landing_pad = 0;
  386. // The current instruction pointer offset within the region
  387. uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context);
  388. unsigned char *callsite_table = (unsigned char*)lsda->call_site_table;
  389. while (callsite_table <= lsda->action_table)
  390. {
  391. // Once again, the layout deviates from the spec.
  392. uint64_t call_site_start, call_site_size, landing_pad, action;
  393. call_site_start = read_value(lsda->callsite_encoding, &callsite_table);
  394. call_site_size = read_value(lsda->callsite_encoding, &callsite_table);
  395. // Call site entries are sorted, so if we find a call site that's after
  396. // the current instruction pointer then there is no action associated
  397. // with this call and we should unwind straight through this frame
  398. // without doing anything.
  399. if (call_site_start > ip) { break; }
  400. // Read the address of the landing pad and the action from the call
  401. // site table.
  402. landing_pad = read_value(lsda->callsite_encoding, &callsite_table);
  403. action = read_uleb128(&callsite_table);
  404. // We should not include the call_site_start (beginning of the region)
  405. // address in the ip range. For each call site:
  406. //
  407. // address1: call proc
  408. // address2: next instruction
  409. //
  410. // The call stack contains address2 and not address1, address1 can be
  411. // at the end of another EH region.
  412. if (call_site_start < ip && ip <= call_site_start + call_site_size)
  413. {
  414. if (action)
  415. {
  416. // Action records are 1-biased so both no-record and zeroth
  417. // record can be stored.
  418. result->action_record = lsda->action_table + action - 1;
  419. }
  420. // No landing pad means keep unwinding.
  421. if (landing_pad)
  422. {
  423. // Landing pad is the offset from the value in the header
  424. result->landing_pad = lsda->landing_pads + landing_pad;
  425. }
  426. return true;
  427. }
  428. }
  429. return false;
  430. }
  431. /// Defines an exception class from 8 bytes (endian independent)
  432. #define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \
  433. (((uint64_t)a << 56) +\
  434. ((uint64_t)b << 48) +\
  435. ((uint64_t)c << 40) +\
  436. ((uint64_t)d << 32) +\
  437. ((uint64_t)e << 24) +\
  438. ((uint64_t)f << 16) +\
  439. ((uint64_t)g << 8) +\
  440. ((uint64_t)h))
  441. #define GENERIC_EXCEPTION_CLASS(e,f,g,h) \
  442. ((uint32_t)e << 24) +\
  443. ((uint32_t)f << 16) +\
  444. ((uint32_t)g << 8) +\
  445. ((uint32_t)h)