1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include "dex_format.h" 20 21 #include <stddef.h> 22 23 // .dex bytecode definitions and helpers: 24 // https://source.android.com/devices/tech/dalvik/dalvik-bytecode.html 25 26 namespace dex { 27 28 // The number of Dalvik opcodes 29 constexpr size_t kNumPackedOpcodes = 0x100; 30 31 // Switch table and array data signatures are a code unit consisting 32 // of "NOP" (0x00) in the low-order byte and a non-zero identifying 33 // code in the high-order byte. (A true NOP is 0x0000.) 34 constexpr u2 kPackedSwitchSignature = 0x0100; 35 constexpr u2 kSparseSwitchSignature = 0x0200; 36 constexpr u2 kArrayDataSignature = 0x0300; 37 38 // Enumeration of all Dalvik opcodes 39 enum Opcode : u1 { 40 #define INSTRUCTION_ENUM(opcode, cname, ...) OP_##cname = (opcode), 41 #include "dex_instruction_list.h" 42 DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM) 43 #undef DEX_INSTRUCTION_LIST 44 #undef INSTRUCTION_ENUM 45 }; 46 47 // Instruction formats associated with Dalvik opcodes 48 enum InstructionFormat : u1 { 49 k10x, // op 50 k12x, // op vA, vB 51 k11n, // op vA, #+B 52 k11x, // op vAA 53 k10t, // op +AA 54 k20t, // op +AAAA 55 k20bc, // [opt] op AA, thing@BBBB 56 k22x, // op vAA, vBBBB 57 k21t, // op vAA, +BBBB 58 k21s, // op vAA, #+BBBB 59 k21h, // op vAA, #+BBBB00000[00000000] 60 k21c, // op vAA, thing@BBBB 61 k23x, // op vAA, vBB, vCC 62 k22b, // op vAA, vBB, #+CC 63 k22t, // op vA, vB, +CCCC 64 k22s, // op vA, vB, #+CCCC 65 k22c, // op vA, vB, thing@CCCC 66 k22cs, // [opt] op vA, vB, field offset CCCC 67 k30t, // op +AAAAAAAA 68 k32x, // op vAAAA, vBBBB 69 k31i, // op vAA, #+BBBBBBBB 70 k31t, // op vAA, +BBBBBBBB 71 k31c, // op vAA, string@BBBBBBBB 72 k35c, // op {vC,vD,vE,vF,vG}, thing@BBBB 73 k35ms, // [opt] invoke-virtual+super 74 k3rc, // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB 75 k3rms, // [opt] invoke-virtual+super/range 76 k35mi, // [opt] inline invoke 77 k3rmi, // [opt] inline invoke/range 78 k45cc, // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH 79 k4rcc, // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH 80 k51l, // op vAA, #+BBBBBBBBBBBBBBBB 81 }; 82 83 using OpcodeFlags = u1; 84 enum : OpcodeFlags { 85 kBranch = 0x01, // conditional or unconditional branch 86 kContinue = 0x02, // flow can continue to next statement 87 kSwitch = 0x04, // switch statement 88 kThrow = 0x08, // could cause an exception to be thrown 89 kReturn = 0x10, // returns, no additional statements 90 kInvoke = 0x20, // a flavor of invoke 91 kUnconditional = 0x40, // unconditional branch 92 kExperimental = 0x80, // is an experimental opcode 93 }; 94 95 using VerifyFlags = u4; 96 enum : VerifyFlags { 97 kVerifyNothing = 0x0000000, 98 kVerifyRegA = 0x0000001, 99 kVerifyRegAWide = 0x0000002, 100 kVerifyRegB = 0x0000004, 101 kVerifyRegBField = 0x0000008, 102 kVerifyRegBMethod = 0x0000010, 103 kVerifyRegBNewInstance = 0x0000020, 104 kVerifyRegBString = 0x0000040, 105 kVerifyRegBType = 0x0000080, 106 kVerifyRegBWide = 0x0000100, 107 kVerifyRegC = 0x0000200, 108 kVerifyRegCField = 0x0000400, 109 kVerifyRegCNewArray = 0x0000800, 110 kVerifyRegCType = 0x0001000, 111 kVerifyRegCWide = 0x0002000, 112 kVerifyArrayData = 0x0004000, 113 kVerifyBranchTarget = 0x0008000, 114 kVerifySwitchTargets = 0x0010000, 115 kVerifyVarArg = 0x0020000, 116 kVerifyVarArgNonZero = 0x0040000, 117 kVerifyVarArgRange = 0x0080000, 118 kVerifyVarArgRangeNonZero = 0x0100000, 119 kVerifyRuntimeOnly = 0x0200000, 120 kVerifyError = 0x0400000, 121 kVerifyRegHPrototype = 0x0800000, 122 kVerifyRegBCallSite = 0x1000000, 123 kVerifyRegBMethodHandle = 0x2000000, 124 kVerifyRegBPrototype = 0x4000000, 125 }; 126 127 // Types of indexed reference that are associated with opcodes whose 128 // formats include such an indexed reference (e.g., 21c and 35c). 129 enum InstructionIndexType : u1 { 130 kIndexUnknown = 0, 131 kIndexNone, // has no index 132 kIndexVaries, // "It depends." Used for throw-verification-error 133 kIndexTypeRef, // type reference index 134 kIndexStringRef, // string reference index 135 kIndexMethodRef, // method reference index 136 kIndexFieldRef, // field reference index 137 kIndexInlineMethod, // inline method index (for inline linked methods) 138 kIndexVtableOffset, // vtable offset (for static linked methods) 139 kIndexFieldOffset, // field offset (for static linked fields) 140 kIndexMethodAndProtoRef, // method index and proto index 141 kIndexCallSiteRef, // call site index 142 kIndexMethodHandleRef, // constant method handle reference index 143 kIndexProtoRef, // constant prototype reference index 144 }; 145 146 // Holds the contents of a decoded instruction. 147 struct Instruction { 148 u4 vA; // the A field of the instruction 149 u4 vB; // the B field of the instruction 150 u8 vB_wide; // 64bit version of the B field (for k51l) 151 u4 vC; // the C field of the instruction 152 u4 arg[5]; // vC/D/E/F/G in invoke or filled-new-array 153 Opcode opcode; // instruction opcode 154 }; 155 156 // "packed-switch-payload" format 157 struct PackedSwitchPayload { 158 u2 ident; 159 u2 size; 160 s4 first_key; 161 s4 targets[]; 162 }; 163 164 // "sparse-switch-payload" format 165 struct SparseSwitchPayload { 166 u2 ident; 167 u2 size; 168 s4 data[]; 169 }; 170 171 // "fill-array-data-payload" format 172 struct ArrayData { 173 u2 ident; 174 u2 element_width; 175 u4 size; 176 u1 data[]; 177 }; 178 179 // Collect the enums in a struct for better locality. 180 struct InstructionDescriptor { 181 u4 verify_flags; // Set of VerifyFlag. 182 InstructionFormat format; 183 InstructionIndexType index_type; 184 u1 flags; // Set of Flags. 185 }; 186 187 // Extracts the opcode from a Dalvik code unit (bytecode) 188 Opcode OpcodeFromBytecode(u2 bytecode); 189 190 // Returns the name of an opcode 191 const char* GetOpcodeName(Opcode opcode); 192 193 // Returns the index type associated with the specified opcode 194 InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode); 195 196 // Returns the format associated with the specified opcode 197 InstructionFormat GetFormatFromOpcode(Opcode opcode); 198 199 // Returns the flags for the specified opcode 200 OpcodeFlags GetFlagsFromOpcode(Opcode opcode); 201 202 // Returns the verify flags for the specified opcode 203 VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode); 204 205 // Returns the instruction width for the specified opcode format 206 size_t GetWidthFromFormat(InstructionFormat format); 207 208 // Return the width of the specified instruction, or 0 if not defined. Also 209 // works for special OP_NOP entries, including switch statement data tables 210 // and array data. 211 size_t GetWidthFromBytecode(const u2* bytecode); 212 213 // Decode a .dex bytecode 214 Instruction DecodeInstruction(const u2* bytecode); 215 216 } // namespace dex 217