1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "slicer/dex_bytecode.h"
18 #include "slicer/common.h"
19
20 #include <assert.h>
21 #include <array>
22
23 namespace dex {
24
OpcodeFromBytecode(u2 bytecode)25 Opcode OpcodeFromBytecode(u2 bytecode) {
26 Opcode opcode = Opcode(bytecode & 0xff);
27 return opcode;
28 }
29
30 // Table that maps each opcode to the index type implied by that opcode
31 static constexpr std::array<InstructionDescriptor, kNumPackedOpcodes>
32 gInstructionDescriptors = {{
33 #define INSTRUCTION_DESCR(o, c, p, format, index, flags, e, vflags) \
34 { \
35 vflags, \
36 format, \
37 index, \
38 flags, \
39 },
40 #include "export/slicer/dex_instruction_list.h"
41 DEX_INSTRUCTION_LIST(INSTRUCTION_DESCR)
42 #undef DEX_INSTRUCTION_LIST
43 #undef INSTRUCTION_DESCR
44 }};
45
GetIndexTypeFromOpcode(Opcode opcode)46 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode) {
47 return gInstructionDescriptors[opcode].index_type;
48 }
49
GetFormatFromOpcode(Opcode opcode)50 InstructionFormat GetFormatFromOpcode(Opcode opcode) {
51 return gInstructionDescriptors[opcode].format;
52 }
53
GetFlagsFromOpcode(Opcode opcode)54 OpcodeFlags GetFlagsFromOpcode(Opcode opcode) {
55 return gInstructionDescriptors[opcode].flags;
56 }
57
GetVerifyFlagsFromOpcode(Opcode opcode)58 VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode) {
59 return gInstructionDescriptors[opcode].verify_flags;
60 }
61
GetWidthFromFormat(InstructionFormat format)62 size_t GetWidthFromFormat(InstructionFormat format) {
63 switch (format) {
64 case k10x:
65 case k12x:
66 case k11n:
67 case k11x:
68 case k10t:
69 return 1;
70 case k20t:
71 case k20bc:
72 case k21c:
73 case k22x:
74 case k21s:
75 case k21t:
76 case k21h:
77 case k23x:
78 case k22b:
79 case k22s:
80 case k22t:
81 case k22c:
82 case k22cs:
83 return 2;
84 case k30t:
85 case k31t:
86 case k31c:
87 case k32x:
88 case k31i:
89 case k35c:
90 case k35ms:
91 case k35mi:
92 case k3rc:
93 case k3rms:
94 case k3rmi:
95 return 3;
96 case k45cc:
97 case k4rcc:
98 return 4;
99 case k51l:
100 return 5;
101 }
102 }
103
GetWidthFromBytecode(const u2 * bytecode)104 size_t GetWidthFromBytecode(const u2* bytecode) {
105 size_t width = 0;
106 if (*bytecode == kPackedSwitchSignature) {
107 width = 4 + bytecode[1] * 2;
108 } else if (*bytecode == kSparseSwitchSignature) {
109 width = 2 + bytecode[1] * 4;
110 } else if (*bytecode == kArrayDataSignature) {
111 u2 elemWidth = bytecode[1];
112 u4 len = bytecode[2] | (((u4)bytecode[3]) << 16);
113 // The plus 1 is to round up for odd size and width.
114 width = 4 + (elemWidth * len + 1) / 2;
115 } else {
116 width = GetWidthFromFormat(
117 GetFormatFromOpcode(OpcodeFromBytecode(bytecode[0])));
118 }
119 return width;
120 }
121
122 // Dalvik opcode names.
123 static constexpr std::array<const char*, kNumPackedOpcodes> gOpcodeNames = {
124 #define INSTRUCTION_NAME(o, c, pname, f, i, a, e, v) pname,
125 #include "export/slicer/dex_instruction_list.h"
126 DEX_INSTRUCTION_LIST(INSTRUCTION_NAME)
127 #undef DEX_INSTRUCTION_LIST
128 #undef INSTRUCTION_NAME
129 };
130
GetOpcodeName(Opcode opcode)131 const char* GetOpcodeName(Opcode opcode) { return gOpcodeNames[opcode]; }
132
133 // Helpers for DecodeInstruction()
InstA(u2 inst)134 static u4 InstA(u2 inst) { return (inst >> 8) & 0x0f; }
InstB(u2 inst)135 static u4 InstB(u2 inst) { return inst >> 12; }
InstAA(u2 inst)136 static u4 InstAA(u2 inst) { return inst >> 8; }
137
138 // Helper for DecodeInstruction()
FetchU4(const u2 * ptr)139 static u4 FetchU4(const u2* ptr) { return ptr[0] | (u4(ptr[1]) << 16); }
140
141 // Helper for DecodeInstruction()
FetchU8(const u2 * ptr)142 static u8 FetchU8(const u2* ptr) {
143 return FetchU4(ptr) | (u8(FetchU4(ptr + 2)) << 32);
144 }
145
146 // Decode a Dalvik bytecode and extract the individual fields
DecodeInstruction(const u2 * bytecode)147 Instruction DecodeInstruction(const u2* bytecode) {
148 u2 inst = bytecode[0];
149 Opcode opcode = OpcodeFromBytecode(inst);
150 InstructionFormat format = GetFormatFromOpcode(opcode);
151
152 Instruction dec = {};
153 dec.opcode = opcode;
154
155 switch (format) {
156 case k10x: // op
157 return dec;
158 case k12x: // op vA, vB
159 dec.vA = InstA(inst);
160 dec.vB = InstB(inst);
161 return dec;
162 case k11n: // op vA, #+B
163 dec.vA = InstA(inst);
164 dec.vB = s4(InstB(inst) << 28) >> 28; // sign extend 4-bit value
165 return dec;
166 case k11x: // op vAA
167 dec.vA = InstAA(inst);
168 return dec;
169 case k10t: // op +AA
170 dec.vA = s1(InstAA(inst)); // sign-extend 8-bit value
171 return dec;
172 case k20t: // op +AAAA
173 dec.vA = s2(bytecode[1]); // sign-extend 16-bit value
174 return dec;
175 case k20bc: // [opt] op AA, thing@BBBB
176 case k21c: // op vAA, thing@BBBB
177 case k22x: // op vAA, vBBBB
178 dec.vA = InstAA(inst);
179 dec.vB = bytecode[1];
180 return dec;
181 case k21s: // op vAA, #+BBBB
182 case k21t: // op vAA, +BBBB
183 dec.vA = InstAA(inst);
184 dec.vB = s2(bytecode[1]); // sign-extend 16-bit value
185 return dec;
186 case k21h: // op vAA, #+BBBB0000[00000000]
187 dec.vA = InstAA(inst);
188 // The value should be treated as right-zero-extended, but we don't
189 // actually do that here. Among other things, we don't know if it's
190 // the top bits of a 32- or 64-bit value.
191 dec.vB = bytecode[1];
192 return dec;
193 case k23x: // op vAA, vBB, vCC
194 dec.vA = InstAA(inst);
195 dec.vB = bytecode[1] & 0xff;
196 dec.vC = bytecode[1] >> 8;
197 return dec;
198 case k22b: // op vAA, vBB, #+CC
199 dec.vA = InstAA(inst);
200 dec.vB = bytecode[1] & 0xff;
201 dec.vC = s1(bytecode[1] >> 8); // sign-extend 8-bit value
202 return dec;
203 case k22s: // op vA, vB, #+CCCC
204 case k22t: // op vA, vB, +CCCC
205 dec.vA = InstA(inst);
206 dec.vB = InstB(inst);
207 dec.vC = s2(bytecode[1]); // sign-extend 16-bit value
208 return dec;
209 case k22c: // op vA, vB, thing@CCCC
210 case k22cs: // [opt] op vA, vB, field offset CCCC
211 dec.vA = InstA(inst);
212 dec.vB = InstB(inst);
213 dec.vC = bytecode[1];
214 return dec;
215 case k30t: // op +AAAAAAAA
216 dec.vA = FetchU4(bytecode + 1);
217 return dec;
218 case k31t: // op vAA, +BBBBBBBB
219 case k31c: // op vAA, string@BBBBBBBB
220 dec.vA = InstAA(inst);
221 dec.vB = FetchU4(bytecode + 1);
222 return dec;
223 case k32x: // op vAAAA, vBBBB
224 dec.vA = bytecode[1];
225 dec.vB = bytecode[2];
226 return dec;
227 case k31i: // op vAA, #+BBBBBBBB
228 dec.vA = InstAA(inst);
229 dec.vB = FetchU4(bytecode + 1);
230 return dec;
231 case k35c: // op {vC, vD, vE, vF, vG}, thing@BBBB
232 case k35ms: // [opt] invoke-virtual+super
233 case k35mi: { // [opt] inline invoke
234 dec.vA = InstB(inst); // This is labeled A in the spec.
235 dec.vB = bytecode[1];
236
237 u2 regList = bytecode[2];
238
239 // Copy the argument registers into the arg[] array, and
240 // also copy the first argument (if any) into vC. (The
241 // Instruction structure doesn't have separate
242 // fields for {vD, vE, vF, vG}, so there's no need to make
243 // copies of those.) Note that cases 5..2 fall through.
244 switch (dec.vA) {
245 case 5:
246 // A fifth arg is verboten for inline invokes
247 SLICER_CHECK(format != k35mi);
248
249 // Per note at the top of this format decoder, the
250 // fifth argument comes from the A field in the
251 // instruction, but it's labeled G in the spec.
252 dec.arg[4] = InstA(inst);
253 FALLTHROUGH_INTENDED;
254 case 4:
255 dec.arg[3] = (regList >> 12) & 0x0f;
256 FALLTHROUGH_INTENDED;
257 case 3:
258 dec.arg[2] = (regList >> 8) & 0x0f;
259 FALLTHROUGH_INTENDED;
260 case 2:
261 dec.arg[1] = (regList >> 4) & 0x0f;
262 FALLTHROUGH_INTENDED;
263 case 1:
264 dec.vC = dec.arg[0] = regList & 0x0f;
265 FALLTHROUGH_INTENDED;
266 case 0:
267 // Valid, but no need to do anything
268 return dec;
269 }
270 }
271 SLICER_CHECK(!"Invalid arg count in 35c/35ms/35mi");
272 case k3rc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
273 case k3rms: // [opt] invoke-virtual+super/range
274 case k3rmi: // [opt] execute-inline/range
275 dec.vA = InstAA(inst);
276 dec.vB = bytecode[1];
277 dec.vC = bytecode[2];
278 return dec;
279 case k45cc: {
280 // AG op BBBB FEDC HHHH
281 dec.vA = InstB(inst); // This is labelled A in the spec.
282 dec.vB = bytecode[1]; // vB meth@BBBB
283
284 u2 regList = bytecode[2];
285 dec.vC = regList & 0xf;
286 dec.arg[0] = (regList >> 4) & 0xf; // vD
287 dec.arg[1] = (regList >> 8) & 0xf; // vE
288 dec.arg[2] = (regList >> 12); // vF
289 dec.arg[3] = InstA(inst); // vG
290 dec.arg[4] = bytecode[3]; // vH proto@HHHH
291 }
292 return dec;
293 case k4rcc:
294 // AA op BBBB CCCC HHHH
295 dec.vA = InstAA(inst);
296 dec.vB = bytecode[1];
297 dec.vC = bytecode[2];
298 dec.arg[4] = bytecode[3]; // vH proto@HHHH
299 return dec;
300 case k51l: // op vAA, #+BBBBBBBBBBBBBBBB
301 dec.vA = InstAA(inst);
302 dec.vB_wide = FetchU8(bytecode + 1);
303 return dec;
304 }
305 SLICER_FATAL("Can't decode unexpected format 0x%02x (op=0x%02x)", format,
306 opcode);
307 }
308
309 } // namespace dex
310