1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "slicer/dex_bytecode.h"
18 #include "slicer/common.h"
19 
20 #include <assert.h>
21 #include <array>
22 
23 namespace dex {
24 
OpcodeFromBytecode(u2 bytecode)25 Opcode OpcodeFromBytecode(u2 bytecode) {
26   Opcode opcode = Opcode(bytecode & 0xff);
27   return opcode;
28 }
29 
30 // Table that maps each opcode to the index type implied by that opcode
31 static constexpr std::array<InstructionDescriptor, kNumPackedOpcodes>
32     gInstructionDescriptors = {{
33 #define INSTRUCTION_DESCR(o, c, p, format, index, flags, e, vflags) \
34   {                                                                 \
35       vflags,                                                       \
36       format,                                                       \
37       index,                                                        \
38       flags,                                                        \
39   },
40 #include "export/slicer/dex_instruction_list.h"
41         DEX_INSTRUCTION_LIST(INSTRUCTION_DESCR)
42 #undef DEX_INSTRUCTION_LIST
43 #undef INSTRUCTION_DESCR
44     }};
45 
GetIndexTypeFromOpcode(Opcode opcode)46 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode) {
47   return gInstructionDescriptors[opcode].index_type;
48 }
49 
GetFormatFromOpcode(Opcode opcode)50 InstructionFormat GetFormatFromOpcode(Opcode opcode) {
51   return gInstructionDescriptors[opcode].format;
52 }
53 
GetFlagsFromOpcode(Opcode opcode)54 OpcodeFlags GetFlagsFromOpcode(Opcode opcode) {
55   return gInstructionDescriptors[opcode].flags;
56 }
57 
GetVerifyFlagsFromOpcode(Opcode opcode)58 VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode) {
59   return gInstructionDescriptors[opcode].verify_flags;
60 }
61 
GetWidthFromFormat(InstructionFormat format)62 size_t GetWidthFromFormat(InstructionFormat format) {
63   switch (format) {
64     case k10x:
65     case k12x:
66     case k11n:
67     case k11x:
68     case k10t:
69       return 1;
70     case k20t:
71     case k20bc:
72     case k21c:
73     case k22x:
74     case k21s:
75     case k21t:
76     case k21h:
77     case k23x:
78     case k22b:
79     case k22s:
80     case k22t:
81     case k22c:
82     case k22cs:
83       return 2;
84     case k30t:
85     case k31t:
86     case k31c:
87     case k32x:
88     case k31i:
89     case k35c:
90     case k35ms:
91     case k35mi:
92     case k3rc:
93     case k3rms:
94     case k3rmi:
95       return 3;
96     case k45cc:
97     case k4rcc:
98       return 4;
99     case k51l:
100       return 5;
101   }
102 }
103 
GetWidthFromBytecode(const u2 * bytecode)104 size_t GetWidthFromBytecode(const u2* bytecode) {
105   size_t width = 0;
106   if (*bytecode == kPackedSwitchSignature) {
107     width = 4 + bytecode[1] * 2;
108   } else if (*bytecode == kSparseSwitchSignature) {
109     width = 2 + bytecode[1] * 4;
110   } else if (*bytecode == kArrayDataSignature) {
111     u2 elemWidth = bytecode[1];
112     u4 len = bytecode[2] | (((u4)bytecode[3]) << 16);
113     // The plus 1 is to round up for odd size and width.
114     width = 4 + (elemWidth * len + 1) / 2;
115   } else {
116     width = GetWidthFromFormat(
117         GetFormatFromOpcode(OpcodeFromBytecode(bytecode[0])));
118   }
119   return width;
120 }
121 
122 // Dalvik opcode names.
123 static constexpr std::array<const char*, kNumPackedOpcodes> gOpcodeNames = {
124 #define INSTRUCTION_NAME(o, c, pname, f, i, a, e, v) pname,
125 #include "export/slicer/dex_instruction_list.h"
126     DEX_INSTRUCTION_LIST(INSTRUCTION_NAME)
127 #undef DEX_INSTRUCTION_LIST
128 #undef INSTRUCTION_NAME
129 };
130 
GetOpcodeName(Opcode opcode)131 const char* GetOpcodeName(Opcode opcode) { return gOpcodeNames[opcode]; }
132 
133 // Helpers for DecodeInstruction()
InstA(u2 inst)134 static u4 InstA(u2 inst) { return (inst >> 8) & 0x0f; }
InstB(u2 inst)135 static u4 InstB(u2 inst) { return inst >> 12; }
InstAA(u2 inst)136 static u4 InstAA(u2 inst) { return inst >> 8; }
137 
138 // Helper for DecodeInstruction()
FetchU4(const u2 * ptr)139 static u4 FetchU4(const u2* ptr) { return ptr[0] | (u4(ptr[1]) << 16); }
140 
141 // Helper for DecodeInstruction()
FetchU8(const u2 * ptr)142 static u8 FetchU8(const u2* ptr) {
143   return FetchU4(ptr) | (u8(FetchU4(ptr + 2)) << 32);
144 }
145 
146 // Decode a Dalvik bytecode and extract the individual fields
DecodeInstruction(const u2 * bytecode)147 Instruction DecodeInstruction(const u2* bytecode) {
148   u2 inst = bytecode[0];
149   Opcode opcode = OpcodeFromBytecode(inst);
150   InstructionFormat format = GetFormatFromOpcode(opcode);
151 
152   Instruction dec = {};
153   dec.opcode = opcode;
154 
155   switch (format) {
156     case k10x:  // op
157       return dec;
158     case k12x:  // op vA, vB
159       dec.vA = InstA(inst);
160       dec.vB = InstB(inst);
161       return dec;
162     case k11n:  // op vA, #+B
163       dec.vA = InstA(inst);
164       dec.vB = s4(InstB(inst) << 28) >> 28;  // sign extend 4-bit value
165       return dec;
166     case k11x:  // op vAA
167       dec.vA = InstAA(inst);
168       return dec;
169     case k10t:                    // op +AA
170       dec.vA = s1(InstAA(inst));  // sign-extend 8-bit value
171       return dec;
172     case k20t:                   // op +AAAA
173       dec.vA = s2(bytecode[1]);  // sign-extend 16-bit value
174       return dec;
175     case k20bc:  // [opt] op AA, thing@BBBB
176     case k21c:   // op vAA, thing@BBBB
177     case k22x:   // op vAA, vBBBB
178       dec.vA = InstAA(inst);
179       dec.vB = bytecode[1];
180       return dec;
181     case k21s:  // op vAA, #+BBBB
182     case k21t:  // op vAA, +BBBB
183       dec.vA = InstAA(inst);
184       dec.vB = s2(bytecode[1]);  // sign-extend 16-bit value
185       return dec;
186     case k21h:  // op vAA, #+BBBB0000[00000000]
187       dec.vA = InstAA(inst);
188       // The value should be treated as right-zero-extended, but we don't
189       // actually do that here. Among other things, we don't know if it's
190       // the top bits of a 32- or 64-bit value.
191       dec.vB = bytecode[1];
192       return dec;
193     case k23x:  // op vAA, vBB, vCC
194       dec.vA = InstAA(inst);
195       dec.vB = bytecode[1] & 0xff;
196       dec.vC = bytecode[1] >> 8;
197       return dec;
198     case k22b:  // op vAA, vBB, #+CC
199       dec.vA = InstAA(inst);
200       dec.vB = bytecode[1] & 0xff;
201       dec.vC = s1(bytecode[1] >> 8);  // sign-extend 8-bit value
202       return dec;
203     case k22s:  // op vA, vB, #+CCCC
204     case k22t:  // op vA, vB, +CCCC
205       dec.vA = InstA(inst);
206       dec.vB = InstB(inst);
207       dec.vC = s2(bytecode[1]);  // sign-extend 16-bit value
208       return dec;
209     case k22c:   // op vA, vB, thing@CCCC
210     case k22cs:  // [opt] op vA, vB, field offset CCCC
211       dec.vA = InstA(inst);
212       dec.vB = InstB(inst);
213       dec.vC = bytecode[1];
214       return dec;
215     case k30t:  // op +AAAAAAAA
216       dec.vA = FetchU4(bytecode + 1);
217       return dec;
218     case k31t:  // op vAA, +BBBBBBBB
219     case k31c:  // op vAA, string@BBBBBBBB
220       dec.vA = InstAA(inst);
221       dec.vB = FetchU4(bytecode + 1);
222       return dec;
223     case k32x:  // op vAAAA, vBBBB
224       dec.vA = bytecode[1];
225       dec.vB = bytecode[2];
226       return dec;
227     case k31i:  // op vAA, #+BBBBBBBB
228       dec.vA = InstAA(inst);
229       dec.vB = FetchU4(bytecode + 1);
230       return dec;
231     case k35c:               // op {vC, vD, vE, vF, vG}, thing@BBBB
232     case k35ms:              // [opt] invoke-virtual+super
233     case k35mi: {            // [opt] inline invoke
234       dec.vA = InstB(inst);  // This is labeled A in the spec.
235       dec.vB = bytecode[1];
236 
237       u2 regList = bytecode[2];
238 
239       // Copy the argument registers into the arg[] array, and
240       // also copy the first argument (if any) into vC. (The
241       // Instruction structure doesn't have separate
242       // fields for {vD, vE, vF, vG}, so there's no need to make
243       // copies of those.) Note that cases 5..2 fall through.
244       switch (dec.vA) {
245         case 5:
246           // A fifth arg is verboten for inline invokes
247           SLICER_CHECK(format != k35mi);
248 
249           // Per note at the top of this format decoder, the
250           // fifth argument comes from the A field in the
251           // instruction, but it's labeled G in the spec.
252           dec.arg[4] = InstA(inst);
253           FALLTHROUGH_INTENDED;
254         case 4:
255           dec.arg[3] = (regList >> 12) & 0x0f;
256           FALLTHROUGH_INTENDED;
257         case 3:
258           dec.arg[2] = (regList >> 8) & 0x0f;
259           FALLTHROUGH_INTENDED;
260         case 2:
261           dec.arg[1] = (regList >> 4) & 0x0f;
262           FALLTHROUGH_INTENDED;
263         case 1:
264           dec.vC = dec.arg[0] = regList & 0x0f;
265           FALLTHROUGH_INTENDED;
266         case 0:
267           // Valid, but no need to do anything
268           return dec;
269       }
270     }
271       SLICER_CHECK(!"Invalid arg count in 35c/35ms/35mi");
272     case k3rc:   // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB
273     case k3rms:  // [opt] invoke-virtual+super/range
274     case k3rmi:  // [opt] execute-inline/range
275       dec.vA = InstAA(inst);
276       dec.vB = bytecode[1];
277       dec.vC = bytecode[2];
278       return dec;
279     case k45cc: {
280       // AG op BBBB FEDC HHHH
281       dec.vA = InstB(inst);  // This is labelled A in the spec.
282       dec.vB = bytecode[1];  // vB meth@BBBB
283 
284       u2 regList = bytecode[2];
285       dec.vC = regList & 0xf;
286       dec.arg[0] = (regList >> 4) & 0xf;  // vD
287       dec.arg[1] = (regList >> 8) & 0xf;  // vE
288       dec.arg[2] = (regList >> 12);       // vF
289       dec.arg[3] = InstA(inst);           // vG
290       dec.arg[4] = bytecode[3];           // vH proto@HHHH
291     }
292       return dec;
293     case k4rcc:
294       // AA op BBBB CCCC HHHH
295       dec.vA = InstAA(inst);
296       dec.vB = bytecode[1];
297       dec.vC = bytecode[2];
298       dec.arg[4] = bytecode[3];  // vH proto@HHHH
299       return dec;
300     case k51l:  // op vAA, #+BBBBBBBBBBBBBBBB
301       dec.vA = InstAA(inst);
302       dec.vB_wide = FetchU8(bytecode + 1);
303       return dec;
304   }
305   SLICER_FATAL("Can't decode unexpected format 0x%02x (op=0x%02x)", format,
306                opcode);
307 }
308 
309 }  // namespace dex
310