2 * Copyright 2008, Google Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
15 * * Neither the name of Google Inc. nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * ncdis.c - disassemble using NaCl decoder.
42 #include "native_client/include/nacl_elf.h"
43 #include "native_client/ncv/ncfileutil.h"
44 #include "native_client/ncv/ncdecode.h"
45 /* NOTE: Generated! */
46 #include "native_client/ncv/ncdisasmtab.h"
48 static const char *progname
;
50 void fatal(const char *fmt
, ...)
53 fprintf(stderr
, "%s: fatal error: ", progname
);
55 vfprintf(stderr
, fmt
, ap
);
61 /* later this will make decoding x87 instructions a bit more concise. */
62 static const char **kDisasmX87Op
[8] = { kDisasm87D8
,
71 /* disassembler stuff */
72 static const char *DisFmt(const struct NCDecoderState
*mstate
) {
73 static const uint8_t kWAITOp
= 0x9b;
74 uint8_t *opbyte
= &mstate
->inst
.maddr
[mstate
->inst
.prefixbytes
];
75 uint8_t pm
= mstate
->inst
.prefixmask
;
77 if (mstate
->opinfo
->insttype
== NACLi_X87
) {
78 if (opbyte
[0] != kWAITOp
) {
79 return kDisasmX87Op
[opbyte
[0]-0xd8][mstate
->inst
.mrm
];
82 if (mstate
->opinfo
->insttype
== NACLi_FCMOV
) {
83 return kDisasmX87Op
[opbyte
[0]-0xd8][mstate
->inst
.mrm
];
85 if (*opbyte
!= kTwoByteOpcodeByte1
) return kDisasm1ByteOp
[opbyte
[0]];
86 if (opbyte
[1] == 0x0f) return kDisasm0F0FOp
[opbyte
[mstate
->inst
.length
- 1]];
87 if (opbyte
[1] == 0x38) return kDisasm0F38Op
[opbyte
[2]];
88 if (opbyte
[1] == 0x3A) return kDisasm0F3AOp
[opbyte
[2]];
89 if (! (pm
& (kPrefixDATA16
| kPrefixREPNE
| kPrefixREP
))) {
90 return kDisasm0FXXOp
[opbyte
[1]];
92 if (pm
& kPrefixDATA16
) return kDisasm660FXXOp
[opbyte
[1]];
93 if (pm
& kPrefixREPNE
) return kDisasmF20FXXOp
[opbyte
[1]];
94 if (pm
& kPrefixREP
) return kDisasmF30FXXOp
[opbyte
[1]];
96 /* no update; should be invalid */
97 return "internal error";
100 static int ByteImmediate(const uint8_t* byte_array
) {
101 return (char) byte_array
[0];
104 static int WordImmediate(const uint8_t* byte_array
) {
105 return (short) (byte_array
[0] + (byte_array
[1] << 8));
108 static int DwordImmediate(const uint8_t* byte_array
) {
109 return (byte_array
[0] +
110 (byte_array
[1] << 8) +
111 (byte_array
[2] << 16) +
112 (byte_array
[3] << 24));
115 static const char* gp_regs
[] = {
116 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi"
119 static const char* mmx_regs
[] = {
120 "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
123 static const char* xmm_regs
[] = {
124 "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
127 static const char* seg_regs
[] = {
128 "%es", "%cs", "%ss", "%ds", "%fs", "%gs"
131 static void SibPrint(const struct NCDecoderState
*mstate
, uint32_t sib_offset
) {
132 uint8_t sib
= mstate
->inst
.maddr
[sib_offset
];
134 if (sib_ss(sib
) == 0) {
135 if (sib_base(sib
) == 5) {
136 const uint8_t* disp_addr
= mstate
->inst
.maddr
+ sib_offset
+ 1;
137 fprintf(stdout
, "[0x%x]", DwordImmediate(disp_addr
));
139 /* Has a base register */
140 if (sib_index(sib
) == 4) {
142 fprintf(stdout
, "[%s]", gp_regs
[sib_base(sib
)]);
144 fprintf(stdout
, "[%s + %s]",
145 gp_regs
[sib_base(sib
)],
146 gp_regs
[sib_index(sib
)]);
150 if (sib_index(sib
) == 4) {
152 fprintf(stdout
, "[%s]", gp_regs
[sib_base(sib
)]);
154 fprintf(stdout
, "[%s + %d * %s]",
155 gp_regs
[sib_base(sib
)],
157 gp_regs
[sib_index(sib
)]);
162 static void SegPrefixPrint(const struct NCDecoderState
*mstate
) {
163 uint8_t pm
= mstate
->inst
.prefixmask
;
164 if (pm
& kPrefixSEGCS
) {
165 fprintf(stdout
, "cs:");
166 } else if (pm
& kPrefixSEGSS
) {
167 fprintf(stdout
, "ss:");
168 } else if (pm
& kPrefixSEGFS
) {
169 fprintf(stdout
, "fs:");
170 } else if (pm
& kPrefixSEGGS
) {
171 fprintf(stdout
, "gs:");
175 static void RegMemPrint(const struct NCDecoderState
*mstate
,
176 const char* reg_names
[]) {
177 uint32_t sib_offset
=
178 mstate
->inst
.prefixbytes
+
180 mstate
->inst
.hasopbyte2
+
181 mstate
->inst
.hasopbyte3
+
183 const uint8_t* disp_addr
= mstate
->inst
.maddr
+
185 mstate
->inst
.hassibbyte
;
187 switch (modrm_mod(mstate
->inst
.mrm
)) {
189 SegPrefixPrint(mstate
);
190 if (4 == modrm_rm(mstate
->inst
.mrm
)) {
191 SibPrint(mstate
, sib_offset
);
192 } else if (5 == modrm_rm(mstate
->inst
.mrm
)) {
193 fprintf(stdout
, "[0x%x]", DwordImmediate(disp_addr
));
195 fprintf(stdout
, "[%s]", gp_regs
[modrm_rm(mstate
->inst
.mrm
)]);
200 SegPrefixPrint(mstate
);
201 fprintf(stdout
, "0x%x", ByteImmediate(disp_addr
));
202 if (4 == modrm_rm(mstate
->inst
.mrm
)) {
203 SibPrint(mstate
, sib_offset
);
205 fprintf(stdout
, "[%s]", gp_regs
[modrm_rm(mstate
->inst
.mrm
)]);
211 SegPrefixPrint(mstate
);
212 fprintf(stdout
, "0x%x", DwordImmediate(disp_addr
));
213 if (4 == modrm_rm(mstate
->inst
.mrm
)) {
214 SibPrint(mstate
, sib_offset
);
216 fprintf(stdout
, "[%s]", gp_regs
[modrm_rm(mstate
->inst
.mrm
)]);
221 fprintf(stdout
, "%s", reg_names
[modrm_rm(mstate
->inst
.mrm
)]);
226 static void InstFormat(const char* format
,
227 const struct NCDecoderState
*mstate
) {
229 char* fmt
= token_buf
;
232 strncpy(token_buf
, format
, sizeof(token_buf
));
235 char* token
= strtok(fmt
, " ,\n");
240 fprintf(stdout
, ", ");
241 } else if (pos
> 0) {
242 fprintf(stdout
, " ");
244 if (('$' == token
[0]) && !strncmp(token
, "$group", 6)) {
245 int mrm
= modrm_reg(mstate
->inst
.mrm
);
246 if (!strcmp(token
, "$group1")) {
247 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP1
][mrm
]);
248 } else if (!strcmp(token
, "$group2")) {
249 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP2
][mrm
]);
250 } else if (!strcmp(token
, "$group3")) {
251 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP3
][mrm
]);
252 } else if (!strcmp(token
, "$group4")) {
253 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP4
][mrm
]);
254 } else if (!strcmp(token
, "$group5")) {
255 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP5
][mrm
]);
256 } else if (!strcmp(token
, "$group6")) {
257 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP6
][mrm
]);
258 } else if (!strcmp(token
, "$group7")) {
259 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP7
][mrm
]);
260 } else if (!strcmp(token
, "$group8")) {
261 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP8
][mrm
]);
262 } else if (!strcmp(token
, "$group9")) {
263 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP9
][mrm
]);
264 } else if (!strcmp(token
, "$group10")) {
265 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP10
][mrm
]);
266 } else if (!strcmp(token
, "$group11")) {
267 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP11
][mrm
]);
268 } else if (!strcmp(token
, "$group12")) {
269 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP12
][mrm
]);
270 } else if (!strcmp(token
, "$group13")) {
271 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP13
][mrm
]);
272 } else if (!strcmp(token
, "$group14")) {
273 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP14
][mrm
]);
274 } else if (!strcmp(token
, "$group15")) {
275 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP15
][mrm
]);
276 } else if (!strcmp(token
, "$group16")) {
277 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP16
][mrm
]);
278 } else if (!strcmp(token
, "$group17")) {
279 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP17
][mrm
]);
280 } else if (!strcmp(token
, "$group1a")) {
281 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUP1A
][mrm
]);
282 } else if (!strcmp(token
, "$groupp")) {
283 fprintf(stdout
, "%s", kDisasmModRMOp
[GROUPP
][mrm
]);
285 fprintf(stdout
, "%s", token
);
287 } else if ('$' == token
[0]) {
288 /* Tokens starting with a $ but not $group need formatting */
291 fprintf(stdout
, "$A");
294 fprintf(stdout
, "%%cr%d", modrm_reg(mstate
->inst
.mrm
));
297 fprintf(stdout
, "%%dr%d", modrm_reg(mstate
->inst
.mrm
));
300 case 'M': /* mod should never be 3 for 'M' */
301 /* TODO: byte and word accesses */
302 RegMemPrint(mstate
, gp_regs
);
305 fprintf(stdout
, "eflags");
308 fprintf(stdout
, "%s", gp_regs
[modrm_reg(mstate
->inst
.mrm
)]);
312 const uint8_t* imm_addr
= mstate
->inst
.maddr
+
313 mstate
->inst
.prefixbytes
+
315 mstate
->inst
.hasopbyte2
+
316 mstate
->inst
.hasopbyte3
+
317 mstate
->opinfo
->hasmrmbyte
+
318 mstate
->inst
.hassibbyte
+
319 mstate
->inst
.dispbytes
;
320 if ('b' == token
[2]) {
321 fprintf(stdout
, "0x%x", ByteImmediate(imm_addr
));
322 } else if ('w' == token
[2]) {
323 fprintf(stdout
, "0x%x", WordImmediate(imm_addr
));
325 fprintf(stdout
, "0x%x", DwordImmediate(imm_addr
));
331 const uint8_t* imm_addr
= mstate
->inst
.maddr
+
332 mstate
->inst
.prefixbytes
+
334 mstate
->inst
.hasopbyte2
+
335 mstate
->inst
.hasopbyte3
+
336 mstate
->opinfo
->hasmrmbyte
+
337 mstate
->inst
.hassibbyte
+
338 mstate
->inst
.dispbytes
;
339 if ('b' == token
[2]) {
340 fprintf(stdout
, "0x%x",
341 mstate
->inst
.vaddr
+ mstate
->inst
.length
+
342 ByteImmediate(imm_addr
));
344 fprintf(stdout
, "0x%x",
345 mstate
->inst
.vaddr
+ mstate
->inst
.length
+
346 DwordImmediate(imm_addr
));
352 const uint8_t* imm_addr
= mstate
->inst
.maddr
+
353 mstate
->inst
.prefixbytes
+
355 mstate
->inst
.hasopbyte2
+
356 mstate
->inst
.hasopbyte3
;
357 fprintf(stdout
, "[0x%x]", DwordImmediate(imm_addr
));
361 if ('R' == token
[2]) {
362 fprintf(stdout
, "%%mm%d", modrm_rm(mstate
->inst
.mrm
));
364 fprintf(stdout
, "%%mm%d", modrm_reg(mstate
->inst
.mrm
));
368 RegMemPrint(mstate
, mmx_regs
);
371 fprintf(stdout
, "%s", gp_regs
[modrm_rm(mstate
->inst
.mrm
)]);
374 fprintf(stdout
, "%s", seg_regs
[modrm_reg(mstate
->inst
.mrm
)]);
377 if ('R' == token
[2]) {
378 fprintf(stdout
, "%%xmm%d", modrm_rm(mstate
->inst
.mrm
));
380 fprintf(stdout
, "%%xmm%d", modrm_reg(mstate
->inst
.mrm
));
384 RegMemPrint(mstate
, xmm_regs
);
387 fprintf(stdout
, "ds:[esi]");
390 fprintf(stdout
, "es:[edi]");
393 fprintf(stdout
, "token('%s')", token
);
397 /* Print the token as is */
398 fprintf(stdout
, "%s", token
);
405 static void PrintInst(const struct NCDecoderState
*mstate
) {
407 fprintf(stdout
, " %x:\t%02x", mstate
->inst
.vaddr
,
408 mstate
->inst
.maddr
[0]);
409 for (i
= 1; i
< mstate
->inst
.length
; i
++) {
410 fprintf(stdout
, " %02x", mstate
->inst
.maddr
[i
]);
412 for (i
= mstate
->inst
.length
; i
< 7; i
++) fprintf(stdout
, " ");
413 fprintf(stdout
, "\t");
414 InstFormat(DisFmt(mstate
), mstate
);
415 fprintf(stdout
, "\n");
418 int AnalyzeSections(ncfile
*ncf
) {
421 Elf32_Shdr
*shdr
= ncf
->sheaders
;
423 for (ii
= 0; ii
< ncf
->shnum
; ii
++) {
424 printf("section %d sh_addr %x offset %x flags %x\n",
425 ii
, (uint32_t)shdr
[ii
].sh_addr
,
426 (uint32_t)shdr
[ii
].sh_offset
, (uint32_t)shdr
[ii
].sh_flags
);
427 if ((shdr
[ii
].sh_flags
& SHF_EXECINSTR
) != SHF_EXECINSTR
)
429 printf("parsing section %d\n", ii
);
430 NCDecodeSegment(ncf
->data
+ (shdr
[ii
].sh_addr
- ncf
->vbase
),
431 shdr
[ii
].sh_addr
, shdr
[ii
].sh_size
, NULL
);
437 void AnalyzeCodeSegments(ncfile
*ncf
, const char *fname
) {
438 if (AnalyzeSections(ncf
) < 0) {
439 fprintf(stderr
, "%s: text validate failed\n", fname
);
443 static const char* GrockArgv(int argc
, const char *argv
[]) {
447 fprintf(stderr
, "Running %s unit tests\n", progname
);
453 /* TODO: try to avoid this */
454 static void PseudeUseToAvoidCompilerWarnings() {
455 if (kDisasm660F38Op
[0] == NULL
||
456 kDisasmF20F38Op
[0] == NULL
||
457 kDisasm660F3AOp
[0] == NULL
) {
458 fprintf(stderr
, "malformed tables???\n");
463 int main(int argc
, const char *argv
[]) {
464 const char *loadname
= GrockArgv(argc
, argv
);
467 PseudeUseToAvoidCompilerWarnings();
468 NCDecodeRegisterCallbacks(PrintInst
, NULL
, NULL
, NULL
);
469 if (loadname
== NULL
) {
470 extern void ncdecode_unittests();
472 ncdecode_unittests();
476 ncf
= nc_loadfile(loadname
);
478 fatal("nc_loadfile(%s): %s\n", strerror(errno
));
480 AnalyzeCodeSegments(ncf
, loadname
);