1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "dex_format.h"
20 
21 #include <stddef.h>
22 
23 // .dex bytecode definitions and helpers:
24 // https://source.android.com/devices/tech/dalvik/dalvik-bytecode.html
25 
26 namespace dex {
27 
28 // The number of Dalvik opcodes
29 constexpr size_t kNumPackedOpcodes = 0x100;
30 
31 // Switch table and array data signatures are a code unit consisting
32 // of "NOP" (0x00) in the low-order byte and a non-zero identifying
33 // code in the high-order byte. (A true NOP is 0x0000.)
34 constexpr u2 kPackedSwitchSignature = 0x0100;
35 constexpr u2 kSparseSwitchSignature = 0x0200;
36 constexpr u2 kArrayDataSignature = 0x0300;
37 
38 // Enumeration of all Dalvik opcodes
39 enum Opcode : u1 {
40 #define INSTRUCTION_ENUM(opcode, cname, ...) OP_##cname = (opcode),
41 #include "dex_instruction_list.h"
42   DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM)
43 #undef DEX_INSTRUCTION_LIST
44 #undef INSTRUCTION_ENUM
45 };
46 
47 // Instruction formats associated with Dalvik opcodes
48 enum InstructionFormat : u1 {
49   k10x,   // op
50   k12x,   // op vA, vB
51   k11n,   // op vA, #+B
52   k11x,   // op vAA
53   k10t,   // op +AA
54   k20t,   // op +AAAA
55   k20bc,  // [opt] op AA, thing@BBBB
56   k22x,   // op vAA, vBBBB
57   k21t,   // op vAA, +BBBB
58   k21s,   // op vAA, #+BBBB
59   k21h,   // op vAA, #+BBBB00000[00000000]
60   k21c,   // op vAA, thing@BBBB
61   k23x,   // op vAA, vBB, vCC
62   k22b,   // op vAA, vBB, #+CC
63   k22t,   // op vA, vB, +CCCC
64   k22s,   // op vA, vB, #+CCCC
65   k22c,   // op vA, vB, thing@CCCC
66   k22cs,  // [opt] op vA, vB, field offset CCCC
67   k30t,   // op +AAAAAAAA
68   k32x,   // op vAAAA, vBBBB
69   k31i,   // op vAA, #+BBBBBBBB
70   k31t,   // op vAA, +BBBBBBBB
71   k31c,   // op vAA, string@BBBBBBBB
72   k35c,   // op {vC,vD,vE,vF,vG}, thing@BBBB
73   k35ms,  // [opt] invoke-virtual+super
74   k3rc,   // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
75   k3rms,  // [opt] invoke-virtual+super/range
76   k35mi,  // [opt] inline invoke
77   k3rmi,  // [opt] inline invoke/range
78   k45cc,  // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
79   k4rcc,  // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
80   k51l,   // op vAA, #+BBBBBBBBBBBBBBBB
81 };
82 
83 using OpcodeFlags = u1;
84 enum : OpcodeFlags {
85   kBranch = 0x01,         // conditional or unconditional branch
86   kContinue = 0x02,       // flow can continue to next statement
87   kSwitch = 0x04,         // switch statement
88   kThrow = 0x08,          // could cause an exception to be thrown
89   kReturn = 0x10,         // returns, no additional statements
90   kInvoke = 0x20,         // a flavor of invoke
91   kUnconditional = 0x40,  // unconditional branch
92   kExperimental = 0x80,   // is an experimental opcode
93 };
94 
95 using VerifyFlags = u4;
96 enum : VerifyFlags {
97   kVerifyNothing = 0x0000000,
98   kVerifyRegA = 0x0000001,
99   kVerifyRegAWide = 0x0000002,
100   kVerifyRegB = 0x0000004,
101   kVerifyRegBField = 0x0000008,
102   kVerifyRegBMethod = 0x0000010,
103   kVerifyRegBNewInstance = 0x0000020,
104   kVerifyRegBString = 0x0000040,
105   kVerifyRegBType = 0x0000080,
106   kVerifyRegBWide = 0x0000100,
107   kVerifyRegC = 0x0000200,
108   kVerifyRegCField = 0x0000400,
109   kVerifyRegCNewArray = 0x0000800,
110   kVerifyRegCType = 0x0001000,
111   kVerifyRegCWide = 0x0002000,
112   kVerifyArrayData = 0x0004000,
113   kVerifyBranchTarget = 0x0008000,
114   kVerifySwitchTargets = 0x0010000,
115   kVerifyVarArg = 0x0020000,
116   kVerifyVarArgNonZero = 0x0040000,
117   kVerifyVarArgRange = 0x0080000,
118   kVerifyVarArgRangeNonZero = 0x0100000,
119   kVerifyRuntimeOnly = 0x0200000,
120   kVerifyError = 0x0400000,
121   kVerifyRegHPrototype = 0x0800000,
122   kVerifyRegBCallSite = 0x1000000,
123   kVerifyRegBMethodHandle = 0x2000000,
124   kVerifyRegBPrototype = 0x4000000,
125 };
126 
127 // Types of indexed reference that are associated with opcodes whose
128 // formats include such an indexed reference (e.g., 21c and 35c).
129 enum InstructionIndexType : u1 {
130   kIndexUnknown = 0,
131   kIndexNone,               // has no index
132   kIndexVaries,             // "It depends." Used for throw-verification-error
133   kIndexTypeRef,            // type reference index
134   kIndexStringRef,          // string reference index
135   kIndexMethodRef,          // method reference index
136   kIndexFieldRef,           // field reference index
137   kIndexInlineMethod,       // inline method index (for inline linked methods)
138   kIndexVtableOffset,       // vtable offset (for static linked methods)
139   kIndexFieldOffset,        // field offset (for static linked fields)
140   kIndexMethodAndProtoRef,  // method index and proto index
141   kIndexCallSiteRef,        // call site index
142   kIndexMethodHandleRef,    // constant method handle reference index
143   kIndexProtoRef,           // constant prototype reference index
144 };
145 
146 // Holds the contents of a decoded instruction.
147 struct Instruction {
148   u4 vA;          // the A field of the instruction
149   u4 vB;          // the B field of the instruction
150   u8 vB_wide;     // 64bit version of the B field (for k51l)
151   u4 vC;          // the C field of the instruction
152   u4 arg[5];      // vC/D/E/F/G in invoke or filled-new-array
153   Opcode opcode;  // instruction opcode
154 };
155 
156 // "packed-switch-payload" format
157 struct PackedSwitchPayload {
158   u2 ident;
159   u2 size;
160   s4 first_key;
161   s4 targets[];
162 };
163 
164 // "sparse-switch-payload" format
165 struct SparseSwitchPayload {
166   u2 ident;
167   u2 size;
168   s4 data[];
169 };
170 
171 // "fill-array-data-payload" format
172 struct ArrayData {
173   u2 ident;
174   u2 element_width;
175   u4 size;
176   u1 data[];
177 };
178 
179 // Collect the enums in a struct for better locality.
180 struct InstructionDescriptor {
181   u4 verify_flags;  // Set of VerifyFlag.
182   InstructionFormat format;
183   InstructionIndexType index_type;
184   u1 flags;  // Set of Flags.
185 };
186 
187 // Extracts the opcode from a Dalvik code unit (bytecode)
188 Opcode OpcodeFromBytecode(u2 bytecode);
189 
190 // Returns the name of an opcode
191 const char* GetOpcodeName(Opcode opcode);
192 
193 // Returns the index type associated with the specified opcode
194 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode);
195 
196 // Returns the format associated with the specified opcode
197 InstructionFormat GetFormatFromOpcode(Opcode opcode);
198 
199 // Returns the flags for the specified opcode
200 OpcodeFlags GetFlagsFromOpcode(Opcode opcode);
201 
202 // Returns the verify flags for the specified opcode
203 VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode);
204 
205 // Returns the instruction width for the specified opcode format
206 size_t GetWidthFromFormat(InstructionFormat format);
207 
208 // Return the width of the specified instruction, or 0 if not defined.  Also
209 // works for special OP_NOP entries, including switch statement data tables
210 // and array data.
211 size_t GetWidthFromBytecode(const u2* bytecode);
212 
213 // Decode a .dex bytecode
214 Instruction DecodeInstruction(const u2* bytecode);
215 
216 }  // namespace dex
217