mini_disassembler.cc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
  2. /* Copyright (c) 2007, Google Inc.
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are
  7. * met:
  8. *
  9. * * Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * * Redistributions in binary form must reproduce the above
  12. * copyright notice, this list of conditions and the following disclaimer
  13. * in the documentation and/or other materials provided with the
  14. * distribution.
  15. * * Neither the name of Google Inc. nor the names of its
  16. * contributors may be used to endorse or promote products derived from
  17. * this software without specific prior written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. *
  31. * ---
  32. * Author: Joi Sigurdsson
  33. *
  34. * Implementation of MiniDisassembler.
  35. */
  36. #include "mini_disassembler.h"
  37. namespace sidestep {
  38. MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
  39. bool address_default_is_32_bits)
  40. : operand_default_is_32_bits_(operand_default_is_32_bits),
  41. address_default_is_32_bits_(address_default_is_32_bits) {
  42. Initialize();
  43. }
  44. MiniDisassembler::MiniDisassembler()
  45. : operand_default_is_32_bits_(true),
  46. address_default_is_32_bits_(true) {
  47. Initialize();
  48. }
  49. InstructionType MiniDisassembler::Disassemble(
  50. unsigned char* start_byte,
  51. unsigned int& instruction_bytes) {
  52. // Clean up any state from previous invocations.
  53. Initialize();
  54. // Start by processing any prefixes.
  55. unsigned char* current_byte = start_byte;
  56. unsigned int size = 0;
  57. InstructionType instruction_type = ProcessPrefixes(current_byte, size);
  58. if (IT_UNKNOWN == instruction_type)
  59. return instruction_type;
  60. current_byte += size;
  61. size = 0;
  62. // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
  63. // and address_is_32_bits_ flags are correctly set.
  64. instruction_type = ProcessOpcode(current_byte, 0, size);
  65. // Check for error processing instruction
  66. if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
  67. return IT_UNKNOWN;
  68. }
  69. current_byte += size;
  70. // Invariant: operand_bytes_ indicates the total size of operands
  71. // specified by the opcode and/or ModR/M byte and/or SIB byte.
  72. // pCurrentByte points to the first byte after the ModR/M byte, or after
  73. // the SIB byte if it is present (i.e. the first byte of any operands
  74. // encoded in the instruction).
  75. // We get the total length of any prefixes, the opcode, and the ModR/M and
  76. // SIB bytes if present, by taking the difference of the original starting
  77. // address and the current byte (which points to the first byte of the
  78. // operands if present, or to the first byte of the next instruction if
  79. // they are not). Adding the count of bytes in the operands encoded in
  80. // the instruction gives us the full length of the instruction in bytes.
  81. instruction_bytes += operand_bytes_ + (current_byte - start_byte);
  82. // Return the instruction type, which was set by ProcessOpcode().
  83. return instruction_type_;
  84. }
  85. void MiniDisassembler::Initialize() {
  86. operand_is_32_bits_ = operand_default_is_32_bits_;
  87. address_is_32_bits_ = address_default_is_32_bits_;
  88. #ifdef _M_X64
  89. operand_default_support_64_bits_ = true;
  90. #else
  91. operand_default_support_64_bits_ = false;
  92. #endif
  93. operand_is_64_bits_ = false;
  94. operand_bytes_ = 0;
  95. have_modrm_ = false;
  96. should_decode_modrm_ = false;
  97. instruction_type_ = IT_UNKNOWN;
  98. got_f2_prefix_ = false;
  99. got_f3_prefix_ = false;
  100. got_66_prefix_ = false;
  101. }
  102. InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
  103. unsigned int& size) {
  104. InstructionType instruction_type = IT_GENERIC;
  105. const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
  106. switch (opcode.type_) {
  107. case IT_PREFIX_ADDRESS:
  108. address_is_32_bits_ = !address_default_is_32_bits_;
  109. goto nochangeoperand;
  110. case IT_PREFIX_OPERAND:
  111. operand_is_32_bits_ = !operand_default_is_32_bits_;
  112. nochangeoperand:
  113. case IT_PREFIX:
  114. if (0xF2 == (*start_byte))
  115. got_f2_prefix_ = true;
  116. else if (0xF3 == (*start_byte))
  117. got_f3_prefix_ = true;
  118. else if (0x66 == (*start_byte))
  119. got_66_prefix_ = true;
  120. else if (operand_default_support_64_bits_ && (*start_byte) & 0x48)
  121. operand_is_64_bits_ = true;
  122. instruction_type = opcode.type_;
  123. size ++;
  124. // we got a prefix, so add one and check next byte
  125. ProcessPrefixes(start_byte + 1, size);
  126. default:
  127. break; // not a prefix byte
  128. }
  129. return instruction_type;
  130. }
  131. InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
  132. unsigned int table_index,
  133. unsigned int& size) {
  134. const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table
  135. unsigned char current_byte = (*start_byte) >> table.shift_;
  136. current_byte = current_byte & table.mask_; // Mask out the bits we will use
  137. // Check whether the byte we have is inside the table we have.
  138. if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
  139. instruction_type_ = IT_UNKNOWN;
  140. return instruction_type_;
  141. }
  142. const Opcode& opcode = table.table_[current_byte];
  143. if (IT_UNUSED == opcode.type_) {
  144. // This instruction is not used by the IA-32 ISA, so we indicate
  145. // this to the user. Probably means that we were pointed to
  146. // a byte in memory that was not the start of an instruction.
  147. instruction_type_ = IT_UNUSED;
  148. return instruction_type_;
  149. } else if (IT_REFERENCE == opcode.type_) {
  150. // We are looking at an opcode that has more bytes (or is continued
  151. // in the ModR/M byte). Recursively find the opcode definition in
  152. // the table for the opcode's next byte.
  153. size++;
  154. ProcessOpcode(start_byte + 1, opcode.table_index_, size);
  155. return instruction_type_;
  156. }
  157. const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
  158. if (opcode.is_prefix_dependent_) {
  159. if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
  160. specific_opcode = &opcode.opcode_if_f2_prefix_;
  161. } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
  162. specific_opcode = &opcode.opcode_if_f3_prefix_;
  163. } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
  164. specific_opcode = &opcode.opcode_if_66_prefix_;
  165. }
  166. }
  167. // Inv: The opcode type is known.
  168. instruction_type_ = specific_opcode->type_;
  169. // Let's process the operand types to see if we have any immediate
  170. // operands, and/or a ModR/M byte.
  171. ProcessOperand(specific_opcode->flag_dest_);
  172. ProcessOperand(specific_opcode->flag_source_);
  173. ProcessOperand(specific_opcode->flag_aux_);
  174. // Inv: We have processed the opcode and incremented operand_bytes_
  175. // by the number of bytes of any operands specified by the opcode
  176. // that are stored in the instruction (not registers etc.). Now
  177. // we need to return the total number of bytes for the opcode and
  178. // for the ModR/M or SIB bytes if they are present.
  179. if (table.mask_ != 0xff) {
  180. if (have_modrm_) {
  181. // we're looking at a ModR/M byte so we're not going to
  182. // count that into the opcode size
  183. ProcessModrm(start_byte, size);
  184. return IT_GENERIC;
  185. } else {
  186. // need to count the ModR/M byte even if it's just being
  187. // used for opcode extension
  188. size++;
  189. return IT_GENERIC;
  190. }
  191. } else {
  192. if (have_modrm_) {
  193. // The ModR/M byte is the next byte.
  194. size++;
  195. ProcessModrm(start_byte + 1, size);
  196. return IT_GENERIC;
  197. } else {
  198. size++;
  199. return IT_GENERIC;
  200. }
  201. }
  202. }
  203. bool MiniDisassembler::ProcessOperand(int flag_operand) {
  204. bool succeeded = true;
  205. if (AM_NOT_USED == flag_operand)
  206. return succeeded;
  207. // Decide what to do based on the addressing mode.
  208. switch (flag_operand & AM_MASK) {
  209. // No ModR/M byte indicated by these addressing modes, and no
  210. // additional (e.g. immediate) parameters.
  211. case AM_A: // Direct address
  212. case AM_F: // EFLAGS register
  213. case AM_X: // Memory addressed by the DS:SI register pair
  214. case AM_Y: // Memory addressed by the ES:DI register pair
  215. case AM_IMPLICIT: // Parameter is implicit, occupies no space in
  216. // instruction
  217. break;
  218. // There is a ModR/M byte but it does not necessarily need
  219. // to be decoded.
  220. case AM_C: // reg field of ModR/M selects a control register
  221. case AM_D: // reg field of ModR/M selects a debug register
  222. case AM_G: // reg field of ModR/M selects a general register
  223. case AM_P: // reg field of ModR/M selects an MMX register
  224. case AM_R: // mod field of ModR/M may refer only to a general register
  225. case AM_S: // reg field of ModR/M selects a segment register
  226. case AM_T: // reg field of ModR/M selects a test register
  227. case AM_V: // reg field of ModR/M selects a 128-bit XMM register
  228. have_modrm_ = true;
  229. break;
  230. // In these addressing modes, there is a ModR/M byte and it needs to be
  231. // decoded. No other (e.g. immediate) params than indicated in ModR/M.
  232. case AM_E: // Operand is either a general-purpose register or memory,
  233. // specified by ModR/M byte
  234. case AM_M: // ModR/M byte will refer only to memory
  235. case AM_Q: // Operand is either an MMX register or memory (complex
  236. // evaluation), specified by ModR/M byte
  237. case AM_W: // Operand is either a 128-bit XMM register or memory (complex
  238. // eval), specified by ModR/M byte
  239. have_modrm_ = true;
  240. should_decode_modrm_ = true;
  241. break;
  242. // These addressing modes specify an immediate or an offset value
  243. // directly, so we need to look at the operand type to see how many
  244. // bytes.
  245. case AM_I: // Immediate data.
  246. case AM_J: // Jump to offset.
  247. case AM_O: // Operand is at offset.
  248. switch (flag_operand & OT_MASK) {
  249. case OT_B: // Byte regardless of operand-size attribute.
  250. operand_bytes_ += OS_BYTE;
  251. break;
  252. case OT_C: // Byte or word, depending on operand-size attribute.
  253. if (operand_is_32_bits_)
  254. operand_bytes_ += OS_WORD;
  255. else
  256. operand_bytes_ += OS_BYTE;
  257. break;
  258. case OT_D: // Doubleword, regardless of operand-size attribute.
  259. operand_bytes_ += OS_DOUBLE_WORD;
  260. break;
  261. case OT_DQ: // Double-quadword, regardless of operand-size attribute.
  262. operand_bytes_ += OS_DOUBLE_QUAD_WORD;
  263. break;
  264. case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
  265. // attribute.
  266. if (operand_is_32_bits_)
  267. operand_bytes_ += OS_48_BIT_POINTER;
  268. else
  269. operand_bytes_ += OS_32_BIT_POINTER;
  270. break;
  271. case OT_PS: // 128-bit packed single-precision floating-point data.
  272. operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
  273. break;
  274. case OT_Q: // Quadword, regardless of operand-size attribute.
  275. operand_bytes_ += OS_QUAD_WORD;
  276. break;
  277. case OT_S: // 6-byte pseudo-descriptor.
  278. operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
  279. break;
  280. case OT_SD: // Scalar Double-Precision Floating-Point Value
  281. case OT_PD: // Unaligned packed double-precision floating point value
  282. operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
  283. break;
  284. case OT_SS:
  285. // Scalar element of a 128-bit packed single-precision
  286. // floating data.
  287. // We simply return enItUnknown since we don't have to support
  288. // floating point
  289. succeeded = false;
  290. break;
  291. case OT_V: // Word, doubleword or quadword, depending on operand-size
  292. // attribute.
  293. if (operand_is_64_bits_ && flag_operand & AM_I &&
  294. flag_operand & IOS_64)
  295. operand_bytes_ += OS_QUAD_WORD;
  296. else if (operand_is_32_bits_)
  297. operand_bytes_ += OS_DOUBLE_WORD;
  298. else
  299. operand_bytes_ += OS_WORD;
  300. break;
  301. case OT_W: // Word, regardless of operand-size attribute.
  302. operand_bytes_ += OS_WORD;
  303. break;
  304. // Can safely ignore these.
  305. case OT_A: // Two one-word operands in memory or two double-word
  306. // operands in memory
  307. case OT_PI: // Quadword MMX technology register (e.g. mm0)
  308. case OT_SI: // Doubleword integer register (e.g., eax)
  309. break;
  310. default:
  311. break;
  312. }
  313. break;
  314. default:
  315. break;
  316. }
  317. return succeeded;
  318. }
  319. bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
  320. unsigned int& size) {
  321. // If we don't need to decode, we just return the size of the ModR/M
  322. // byte (there is never a SIB byte in this case).
  323. if (!should_decode_modrm_) {
  324. size++;
  325. return true;
  326. }
  327. // We never care about the reg field, only the combination of the mod
  328. // and r/m fields, so let's start by packing those fields together into
  329. // 5 bits.
  330. unsigned char modrm = (*start_byte);
  331. unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
  332. modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
  333. mod = mod >> 3; // shift the mod field to the right place
  334. modrm = mod | modrm; // combine the r/m and mod fields as discussed
  335. mod = mod >> 3; // shift the mod field to bits 2..0
  336. // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
  337. // in bits 2..0, and mod contains the mod field in bits 2..0
  338. const ModrmEntry* modrm_entry = 0;
  339. if (address_is_32_bits_)
  340. modrm_entry = &s_ia32_modrm_map_[modrm];
  341. else
  342. modrm_entry = &s_ia16_modrm_map_[modrm];
  343. // Invariant: modrm_entry points to information that we need to decode
  344. // the ModR/M byte.
  345. // Add to the count of operand bytes, if the ModR/M byte indicates
  346. // that some operands are encoded in the instruction.
  347. if (modrm_entry->is_encoded_in_instruction_)
  348. operand_bytes_ += modrm_entry->operand_size_;
  349. // Process the SIB byte if necessary, and return the count
  350. // of ModR/M and SIB bytes.
  351. if (modrm_entry->use_sib_byte_) {
  352. size++;
  353. return ProcessSib(start_byte + 1, mod, size);
  354. } else {
  355. size++;
  356. return true;
  357. }
  358. }
  359. bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
  360. unsigned char mod,
  361. unsigned int& size) {
  362. // get the mod field from the 2..0 bits of the SIB byte
  363. unsigned char sib_base = (*start_byte) & 0x07;
  364. if (0x05 == sib_base) {
  365. switch (mod) {
  366. case 0x00: // mod == 00
  367. case 0x02: // mod == 10
  368. operand_bytes_ += OS_DOUBLE_WORD;
  369. break;
  370. case 0x01: // mod == 01
  371. operand_bytes_ += OS_BYTE;
  372. break;
  373. case 0x03: // mod == 11
  374. // According to the IA-32 docs, there does not seem to be a disp
  375. // value for this value of mod
  376. default:
  377. break;
  378. }
  379. }
  380. size++;
  381. return true;
  382. }
  383. }; // namespace sidestep