2 * Copyright 2008, Google Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
15 * * Neither the name of Google Inc. nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * ncdecode.c - table driven decoder for Native Client
35 * Most x86 decoders I've looked at are big case statements. While
36 * this organization is fairly transparent and obvious, it tends to
37 * lead to messy control flow (gotos, etc.) that make the decoder
38 * more complicated, hence harder to maintain and harder to validate.
40 * This decoder is table driven, which will hopefully result in
41 * substantially less code. Although the code+tables may be more
42 * lines of code than a decoder built around a switch statement,
43 * the smaller amount of actual procedural code and the regular
44 * structure of the tables should make it easier to understand,
45 * debug, and easier to become confident the decoder is correct.
47 * As it is specialized to Native Client, this decoder can also
48 * benefit from any exclusions or simplifications we decide to
49 * make in the dialect of x86 machine code accepted by Native
50 * Client. Any such simplifications should ultimately be easily
51 * recognized by inspection of the decoder configuration tables.
52 * ALSO, the decoder mostly needs to worry about accurate
53 * instruction lengths and finding opcodes. It does not need
54 * to completely resolve the operands of all instructions.
58 #include "native_client/ncv/ncdecode.h"
59 #include "native_client/ncv/ncdecodetab.h"
63 #define dprint(s) fprintf s
67 #define eprint(s) fprintf s
69 static const uint32_t kPrefixTable
[256] = {
71 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, kPrefixSEGES
, 0, 0, 0, 0, 0, 0, 0, kPrefixSEGCS
, 0,
77 0, 0, 0, 0, 0, 0, kPrefixSEGSS
, 0, 0, 0, 0, 0, 0, 0, kPrefixSEGDS
, 0,
79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83 0, 0, 0, 0, kPrefixSEGFS
, kPrefixSEGGS
, kPrefixDATA16
, kPrefixADDR16
,
84 0, 0, 0, 0, 0, 0, 0, 0,
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102 kPrefixLOCK
, 0, kPrefixREPNE
, kPrefixREP
, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
105 /* later this will make decoding x87 instructions a bit more concise. */
106 static const struct OpInfo
*kDecodeX87Op
[8] = { kDecode87D8
,
115 static void NullDecoderAction(const struct NCDecoderState
*mstate
) {
116 UNREFERENCED_PARAMETER(mstate
);
118 static void NullDecoderStats(struct NCValidatorState
*vstate
) {
119 UNREFERENCED_PARAMETER(vstate
);
121 static void DefaultInternalError(struct NCValidatorState
*vstate
) {
122 UNREFERENCED_PARAMETER(vstate
);
125 NCDecoderAction g_DecoderAction
= NullDecoderAction
;
126 NCDecoderStats g_NewSegment
= NullDecoderStats
;
127 NCDecoderStats g_InternalError
= DefaultInternalError
;
128 NCDecoderStats g_SegFault
= NullDecoderStats
;
130 /* Error Condition Handling */
131 static void ErrorSegmentation(struct NCValidatorState
*vstate
) {
132 eprint((stdout
, "ErrorSegmentation\n"));
133 /* When the decoder is used by the NaCl validator */
134 /* the validator provides an error handler that does */
135 /* the necessary bookeeping to track these errors. */
139 static void ErrorInternal(struct NCValidatorState
*vstate
) {
140 eprint((stdout
, "ErrorInternal\n"));
141 /* When the decoder is used by the NaCl validator */
142 /* the validator provides an error handler that does */
143 /* the necessary bookeeping to track these errors. */
144 g_InternalError(vstate
);
147 void InitDecoder(struct NCDecoderState
*mstate
) {
148 mstate
->inst
.vaddr
= mstate
->vpc
;
149 mstate
->inst
.maddr
= mstate
->mpc
;
150 mstate
->inst
.prefixbytes
= 0;
151 mstate
->inst
.prefixmask
= 0;
152 mstate
->inst
.hasopbyte2
= 0;
153 mstate
->inst
.hasopbyte3
= 0;
154 mstate
->inst
.hassibbyte
= 0;
155 mstate
->inst
.mrm
= 0;
156 mstate
->inst
.immtype
= IMM_UNKNOWN
;
157 mstate
->inst
.dispbytes
= 0;
158 mstate
->inst
.length
= 0;
159 mstate
->opinfo
= NULL
;
162 /* at most four prefix bytes are allowed */
163 void ConsumePrefixBytes(struct NCDecoderState
*mstate
) {
167 for (ii
= 0; ii
< kMaxPrefixBytes
; ++ii
) {
168 nb
= *mstate
->nextbyte
;
169 if (kPrefixTable
[nb
] == 0) return;
170 mstate
->inst
.prefixmask
|= kPrefixTable
[nb
];
171 mstate
->inst
.prefixbytes
+= 1;
172 mstate
->nextbyte
+= 1;
176 static const struct OpInfo
*GetExtendedOpInfo(struct NCDecoderState
*mstate
,
179 pm
= mstate
->inst
.prefixmask
;
180 if ((pm
& (kPrefixDATA16
| kPrefixREPNE
| kPrefixREP
)) == 0) {
181 return &kDecode0FXXOp
[opbyte2
];
182 } else if (pm
& kPrefixDATA16
) {
183 return &kDecode660FXXOp
[opbyte2
];
184 } else if (pm
& kPrefixREPNE
) {
185 return &kDecodeF20FXXOp
[opbyte2
];
186 } else if (pm
& kPrefixREP
) {
187 return &kDecodeF30FXXOp
[opbyte2
];
189 ErrorInternal(mstate
->vstate
);
190 return mstate
->opinfo
;
193 static void GetX87OpInfo(struct NCDecoderState
*mstate
) {
194 /* WAIT is an x87 instruction but not in the coproc opcode space. */
195 const uint8_t kWAITOp
= 0x9b;
196 uint8_t kFirstX87Opcode
= 0xd8;
197 uint8_t kLastX87Opcode
= 0xdf;
198 uint8_t op1
= mstate
->inst
.maddr
[mstate
->inst
.prefixbytes
];
199 if (op1
< kFirstX87Opcode
|| op1
> kLastX87Opcode
) {
200 if (op1
!= kWAITOp
) ErrorInternal(mstate
->vstate
);
203 mstate
->opinfo
= &kDecodeX87Op
[op1
- kFirstX87Opcode
][mstate
->inst
.mrm
];
206 void ConsumeOpcodeBytes(struct NCDecoderState
*mstate
) {
207 uint8_t opcode
= *mstate
->nextbyte
;
208 mstate
->opinfo
= &kDecode1ByteOp
[opcode
];
209 mstate
->nextbyte
+= 1;
210 if (opcode
== kTwoByteOpcodeByte1
) {
211 uint8_t opcode2
= *mstate
->nextbyte
;
212 mstate
->opinfo
= GetExtendedOpInfo(mstate
, opcode2
);
213 mstate
->inst
.hasopbyte2
= 1;
214 mstate
->nextbyte
+= 1;
215 if (mstate
->opinfo
->insttype
== NACLi_3BYTE
) {
216 uint8_t opcode3
= *mstate
->nextbyte
;
218 pm
= mstate
->inst
.prefixmask
;
219 mstate
->nextbyte
+= 1;
220 mstate
->inst
.hasopbyte3
= 1;
222 dprint(("NACLi_3BYTE\n"));
224 case 0x38: /* SSSE3, SSE4 */
225 if (pm
& kPrefixDATA16
) {
226 mstate
->opinfo
= &kDecode660F38Op
[opcode3
];
227 } else if (pm
& kPrefixREPNE
) {
228 mstate
->opinfo
= &kDecodeF20F38Op
[opcode3
];
229 } else if (pm
== 0) {
230 mstate
->opinfo
= &kDecode0F38Op
[opcode3
];
232 /* Other prefixes like F3 cause an undefined instruction error. */
233 /* Note from decoder table that NACLi_3BYTE is only used with */
234 /* data16 and repne prefixes. */
235 ErrorInternal(mstate
->vstate
);
238 case 0x3A: /* SSSE3, SSE4 */
239 if (pm
& kPrefixDATA16
) {
240 mstate
->opinfo
= &kDecode660F3AOp
[opcode3
];
241 } else if (pm
== 0) {
242 mstate
->opinfo
= &kDecode0F3AOp
[opcode3
];
244 /* Other prefixes like F3 cause an undefined instruction error. */
245 /* Note from decoder table that NACLi_3BYTE is only used with */
246 /* data16 and repne prefixes. */
247 ErrorInternal(mstate
->vstate
);
251 /* if this happens there is a decoding table bug */
252 ErrorInternal(mstate
->vstate
);
257 mstate
->inst
.immtype
= mstate
->opinfo
->immtype
;
260 void ConsumeModRM(struct NCDecoderState
*mstate
) {
261 if (mstate
->opinfo
->hasmrmbyte
!= 0) {
262 const uint8_t mrm
= *mstate
->nextbyte
;
263 mstate
->inst
.mrm
= mrm
;
264 mstate
->nextbyte
+= 1;
265 if (mstate
->opinfo
->insttype
== NACLi_X87
) {
266 GetX87OpInfo(mstate
);
268 if (mstate
->opinfo
->opinmrm
) {
269 const struct OpInfo
*mopinfo
=
270 &kDecodeModRMOp
[mstate
->opinfo
->opinmrm
][modrm_opcode(mrm
)];
271 mstate
->opinfo
= mopinfo
;
272 if (mstate
->inst
.immtype
== IMM_UNKNOWN
) {
274 mstate
->inst
.immtype
= mopinfo
->immtype
;
276 /* handle weird case for 0xff TEST Ib/Iv */
277 if (modrm_opcode(mrm
) == 0) {
278 if (mstate
->inst
.immtype
== IMM_GROUP3_F6
) {
279 mstate
->inst
.immtype
= IMM_FIXED1
;
281 if (mstate
->inst
.immtype
== IMM_GROUP3_F7
) {
282 mstate
->inst
.immtype
= IMM_DATAV
;
286 if (mstate
->inst
.prefixmask
& kPrefixADDR16
) {
287 switch (modrm_mod(mrm
)) {
289 if (modrm_rm(mrm
) == 0x06) mstate
->inst
.dispbytes
= 2; /* disp16 */
290 else mstate
->inst
.dispbytes
= 0;
293 mstate
->inst
.dispbytes
= 1; /* disp8 */
296 mstate
->inst
.dispbytes
= 2; /* disp16 */
299 mstate
->inst
.dispbytes
= 0; /* no disp */
302 ErrorInternal(mstate
->vstate
);
304 mstate
->inst
.hassibbyte
= 0;
306 switch (modrm_mod(mrm
)) {
308 if (modrm_rm(mrm
) == 0x05) mstate
->inst
.dispbytes
= 4; /* disp32 */
309 else mstate
->inst
.dispbytes
= 0;
312 mstate
->inst
.dispbytes
= 1; /* disp8 */
315 mstate
->inst
.dispbytes
= 4; /* disp32 */
318 mstate
->inst
.dispbytes
= 0; /* no disp */
321 ErrorInternal(mstate
->vstate
);
323 mstate
->inst
.hassibbyte
= ((modrm_rm(mrm
) == 0x04) &&
324 (modrm_mod(mrm
) != 3));
329 void ConsumeSIB(struct NCDecoderState
*mstate
) {
330 if (mstate
->inst
.hassibbyte
!= 0) {
331 const uint8_t sib
= *mstate
->nextbyte
;
332 mstate
->nextbyte
+= 1;
333 if (sib_base(sib
) == 0x05) {
334 switch (modrm_mod(mstate
->inst
.mrm
)) {
335 case 0: mstate
->inst
.dispbytes
= 4; break;
336 case 1: mstate
->inst
.dispbytes
= 1; break;
337 case 2: mstate
->inst
.dispbytes
= 4; break;
340 ErrorInternal(mstate
->vstate
);
346 void ConsumeID(struct NCDecoderState
*mstate
) {
347 if (mstate
->inst
.immtype
== IMM_UNKNOWN
) {
348 ErrorInternal(mstate
->vstate
);
350 /* NOTE: NaCl allows at most one prefix byte */
351 if (mstate
->inst
.prefixmask
& kPrefixDATA16
) {
352 mstate
->nextbyte
+= kImmTypeToSize66
[mstate
->inst
.immtype
];
353 } else if (mstate
->inst
.prefixmask
& kPrefixADDR16
) {
354 mstate
->nextbyte
+= kImmTypeToSize67
[mstate
->inst
.immtype
];
356 mstate
->nextbyte
+= kImmTypeToSize
[mstate
->inst
.immtype
];
358 mstate
->nextbyte
+= mstate
->inst
.dispbytes
;
359 mstate
->inst
.length
= mstate
->nextbyte
- mstate
->mpc
;
362 /* Actually this routine is special for 3DNow instructions */
363 void MaybeGet3ByteOpInfo(struct NCDecoderState
*mstate
) {
364 if (mstate
->opinfo
->insttype
== NACLi_3DNOW
) {
365 uint8_t opbyte1
= mstate
->mpc
[mstate
->inst
.prefixbytes
];
366 uint8_t opbyte2
= mstate
->mpc
[mstate
->inst
.prefixbytes
+ 1];
367 uint8_t immbyte
= mstate
->mpc
[mstate
->inst
.length
- 1];
368 if (opbyte1
== kTwoByteOpcodeByte1
&&
369 opbyte2
== k3DNowOpcodeByte2
) {
370 mstate
->opinfo
= &kDecode0F0FOp
[immbyte
];
375 void NCDecodeRegisterCallbacks(NCDecoderAction decoderaction
,
376 NCDecoderStats newsegment
,
377 NCDecoderStats segfault
,
378 NCDecoderStats internalerror
) {
379 if (decoderaction
!= NULL
) g_DecoderAction
= decoderaction
;
380 if (newsegment
!= NULL
) g_NewSegment
= newsegment
;
381 if (segfault
!= NULL
) g_SegFault
= segfault
;
382 if (internalerror
!= NULL
) g_InternalError
= internalerror
;
385 struct NCDecoderState
*PreviousInst(const struct NCDecoderState
*mstate
,
387 int index
= (mstate
->dbindex
+ nindex
+ kDecodeBufferSize
)
388 & (kDecodeBufferSize
- 1);
389 return &mstate
->decodebuffer
[index
];
392 /* The actual decoder */
393 void NCDecodeSegment(uint8_t *mbase
, uint32_t vbase
, size_t size
,
394 struct NCValidatorState
*vstate
) {
395 const uint32_t vlimit
= vbase
+ size
;
396 struct NCDecoderState decodebuffer
[kDecodeBufferSize
];
397 struct NCDecoderState
*mstate
;
399 for (dbindex
= 0; dbindex
< kDecodeBufferSize
; ++dbindex
) {
400 decodebuffer
[dbindex
].vstate
= vstate
;
401 decodebuffer
[dbindex
].decodebuffer
= decodebuffer
;
402 decodebuffer
[dbindex
].dbindex
= dbindex
;
403 decodebuffer
[dbindex
].inst
.length
= 0; /* indicates no instruction */
404 decodebuffer
[dbindex
].vpc
= 0;
405 decodebuffer
[dbindex
].mpc
= 0;
407 mstate
= &decodebuffer
[0];
408 mstate
->mpc
= (uint8_t *)mbase
;
409 mstate
->nextbyte
= mbase
;
412 dprint(("DecodeSegment(%x-%x)\n", vbase
, vlimit
));
413 g_NewSegment(mstate
->vstate
);
414 while (mstate
->vpc
< vlimit
) {
417 ConsumePrefixBytes(mstate
);
418 ConsumeOpcodeBytes(mstate
);
419 ConsumeModRM(mstate
);
422 MaybeGet3ByteOpInfo(mstate
);
423 /* now scrutinize this instruction */
424 newpc
= mstate
->vpc
+ mstate
->inst
.length
;
425 if (newpc
> vlimit
) {
426 eprint((stdout
, "%x > %x\n", newpc
, vlimit
));
427 ErrorSegmentation(vstate
);
430 g_DecoderAction(mstate
);
431 /* get read for next round */
432 dbindex
= (dbindex
+ 1) & (kDecodeBufferSize
- 1);
433 decodebuffer
[dbindex
].vpc
= newpc
;
434 decodebuffer
[dbindex
].mpc
= mstate
->mpc
+ mstate
->inst
.length
;
435 decodebuffer
[dbindex
].nextbyte
= mstate
->nextbyte
;
436 mstate
= &decodebuffer
[dbindex
];