1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_ 18 #define ART_COMPILER_OPTIMIZING_INLINER_H_ 19 20 #include "dex/dex_file_types.h" 21 #include "dex/invoke_type.h" 22 #include "optimization.h" 23 #include "profile/profile_compilation_info.h" 24 25 namespace art { 26 27 class CodeGenerator; 28 class DexCompilationUnit; 29 class HGraph; 30 class HInvoke; 31 class OptimizingCompilerStats; 32 33 class HInliner : public HOptimization { 34 public: 35 HInliner(HGraph* outer_graph, 36 HGraph* outermost_graph, 37 CodeGenerator* codegen, 38 const DexCompilationUnit& outer_compilation_unit, 39 const DexCompilationUnit& caller_compilation_unit, 40 OptimizingCompilerStats* stats, 41 size_t total_number_of_dex_registers, 42 size_t total_number_of_instructions, 43 HInliner* parent, 44 size_t depth = 0, 45 const char* name = kInlinerPassName) HOptimization(outer_graph,name,stats)46 : HOptimization(outer_graph, name, stats), 47 outermost_graph_(outermost_graph), 48 outer_compilation_unit_(outer_compilation_unit), 49 caller_compilation_unit_(caller_compilation_unit), 50 codegen_(codegen), 51 total_number_of_dex_registers_(total_number_of_dex_registers), 52 total_number_of_instructions_(total_number_of_instructions), 53 parent_(parent), 54 depth_(depth), 55 inlining_budget_(0), 56 inline_stats_(nullptr) {} 57 58 bool Run() override; 59 60 static constexpr const char* kInlinerPassName = "inliner"; 61 62 private: 63 enum InlineCacheType { 64 kInlineCacheNoData = 0, 65 kInlineCacheUninitialized = 1, 66 kInlineCacheMonomorphic = 2, 67 kInlineCachePolymorphic = 3, 68 kInlineCacheMegamorphic = 4, 69 kInlineCacheMissingTypes = 5 70 }; 71 72 bool TryInline(HInvoke* invoke_instruction); 73 74 // Attempt to resolve the target of the invoke instruction to an acutal call 75 // target. 76 // 77 // Returns the target directly in the case of static or direct invokes. 78 // Otherwise, uses CHA devirtualization or other methods to try to find the 79 // call target. 80 ArtMethod* FindActualCallTarget(HInvoke* invoke_instruction, bool* cha_devirtualize) 81 REQUIRES_SHARED(Locks::mutator_lock_); 82 83 // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether 84 // reference type propagation can run after the inlining. If the inlining is successful, this 85 // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true, 86 // a CHA guard needs to be added for the inlining. 87 bool TryInlineAndReplace(HInvoke* invoke_instruction, 88 ArtMethod* resolved_method, 89 ReferenceTypeInfo receiver_type, 90 bool do_rtp, 91 bool cha_devirtualize) 92 REQUIRES_SHARED(Locks::mutator_lock_); 93 94 bool TryBuildAndInline(HInvoke* invoke_instruction, 95 ArtMethod* resolved_method, 96 ReferenceTypeInfo receiver_type, 97 HInstruction** return_replacement) 98 REQUIRES_SHARED(Locks::mutator_lock_); 99 100 bool TryBuildAndInlineHelper(HInvoke* invoke_instruction, 101 ArtMethod* resolved_method, 102 ReferenceTypeInfo receiver_type, 103 HInstruction** return_replacement) 104 REQUIRES_SHARED(Locks::mutator_lock_); 105 106 // Substitutes parameters in the callee graph with their values from the caller. 107 void SubstituteArguments(HGraph* callee_graph, 108 HInvoke* invoke_instruction, 109 ReferenceTypeInfo receiver_type, 110 const DexCompilationUnit& dex_compilation_unit) 111 REQUIRES_SHARED(Locks::mutator_lock_); 112 113 // Run simple optimizations on `callee_graph`. 114 void RunOptimizations(HGraph* callee_graph, 115 const dex::CodeItem* code_item, 116 const DexCompilationUnit& dex_compilation_unit) 117 REQUIRES_SHARED(Locks::mutator_lock_); 118 119 // Try to recognize known simple patterns and replace invoke call with appropriate instructions. 120 bool TryPatternSubstitution(HInvoke* invoke_instruction, 121 ArtMethod* resolved_method, 122 HInstruction** return_replacement) 123 REQUIRES_SHARED(Locks::mutator_lock_); 124 125 // Returns whether inlining is allowed based on ART semantics. 126 bool IsInliningAllowed(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const 127 REQUIRES_SHARED(Locks::mutator_lock_); 128 129 130 // Returns whether ART supports inlining this method. 131 // 132 // Some methods are not supported because they have features for which inlining 133 // is not implemented. For example, we do not currently support inlining throw 134 // instructions into a try block. 135 bool IsInliningSupported(const HInvoke* invoke_instruction, 136 art::ArtMethod* method, 137 const CodeItemDataAccessor& accessor) const 138 REQUIRES_SHARED(Locks::mutator_lock_); 139 140 // Returns whether the inlining budget allows inlining method. 141 // 142 // For example, this checks whether the function has grown too large and 143 // inlining should be prevented. 144 bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const 145 REQUIRES_SHARED(Locks::mutator_lock_); 146 147 // Inspects the body of a method (callee_graph) and returns whether it can be 148 // inlined. 149 // 150 // This checks for instructions and constructs that we do not support 151 // inlining, such as inlining a throw instruction into a try block. 152 bool CanInlineBody(const HGraph* callee_graph, 153 const HBasicBlock* target_block, 154 size_t* out_number_of_instructions) const 155 REQUIRES_SHARED(Locks::mutator_lock_); 156 157 // Create a new HInstanceFieldGet. 158 HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index, 159 ArtMethod* referrer, 160 HInstruction* obj); 161 // Create a new HInstanceFieldSet. 162 HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index, 163 ArtMethod* referrer, 164 HInstruction* obj, 165 HInstruction* value, 166 bool* is_final = nullptr); 167 168 // Try inlining the invoke instruction using inline caches. 169 bool TryInlineFromInlineCache( 170 const DexFile& caller_dex_file, 171 HInvoke* invoke_instruction, 172 ArtMethod* resolved_method) 173 REQUIRES_SHARED(Locks::mutator_lock_); 174 175 // Try getting the inline cache from JIT code cache. 176 // Return true if the inline cache was successfully allocated and the 177 // invoke info was found in the profile info. 178 InlineCacheType GetInlineCacheJIT( 179 HInvoke* invoke_instruction, 180 StackHandleScope<1>* hs, 181 /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) 182 REQUIRES_SHARED(Locks::mutator_lock_); 183 184 // Try getting the inline cache from AOT offline profile. 185 // Return true if the inline cache was successfully allocated and the 186 // invoke info was found in the profile info. 187 InlineCacheType GetInlineCacheAOT(const DexFile& caller_dex_file, 188 HInvoke* invoke_instruction, 189 StackHandleScope<1>* hs, 190 /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) 191 REQUIRES_SHARED(Locks::mutator_lock_); 192 193 // Extract the mirror classes from the offline profile and add them to the `inline_cache`. 194 // Note that even if we have profile data for the invoke the inline_cache might contain 195 // only null entries if the types cannot be resolved. 196 InlineCacheType ExtractClassesFromOfflineProfile( 197 const HInvoke* invoke_instruction, 198 const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile, 199 /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache) 200 REQUIRES_SHARED(Locks::mutator_lock_); 201 202 // Compute the inline cache type. 203 InlineCacheType GetInlineCacheType( 204 const Handle<mirror::ObjectArray<mirror::Class>>& classes) 205 REQUIRES_SHARED(Locks::mutator_lock_); 206 207 // Try to inline the target of a monomorphic call. If successful, the code 208 // in the graph will look like: 209 // if (receiver.getClass() != ic.GetMonomorphicType()) deopt 210 // ... // inlined code 211 bool TryInlineMonomorphicCall(HInvoke* invoke_instruction, 212 ArtMethod* resolved_method, 213 Handle<mirror::ObjectArray<mirror::Class>> classes) 214 REQUIRES_SHARED(Locks::mutator_lock_); 215 216 // Try to inline targets of a polymorphic call. 217 bool TryInlinePolymorphicCall(HInvoke* invoke_instruction, 218 ArtMethod* resolved_method, 219 Handle<mirror::ObjectArray<mirror::Class>> classes) 220 REQUIRES_SHARED(Locks::mutator_lock_); 221 222 bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, 223 ArtMethod* resolved_method, 224 Handle<mirror::ObjectArray<mirror::Class>> classes) 225 REQUIRES_SHARED(Locks::mutator_lock_); 226 227 // Returns whether or not we should use only polymorphic inlining with no deoptimizations. 228 bool UseOnlyPolymorphicInliningWithNoDeopt(); 229 230 // Try CHA-based devirtualization to change virtual method calls into 231 // direct calls. 232 // Returns the actual method that resolved_method can be devirtualized to. 233 ArtMethod* TryCHADevirtualization(ArtMethod* resolved_method) 234 REQUIRES_SHARED(Locks::mutator_lock_); 235 236 // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a 237 // should_deoptimize flag and if it's true, does deoptimization. 238 void AddCHAGuard(HInstruction* invoke_instruction, 239 uint32_t dex_pc, 240 HInstruction* cursor, 241 HBasicBlock* bb_cursor); 242 243 HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker, 244 HInstruction* receiver, 245 uint32_t dex_pc) const 246 REQUIRES_SHARED(Locks::mutator_lock_); 247 248 void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement) 249 REQUIRES_SHARED(Locks::mutator_lock_); 250 251 bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) 252 REQUIRES_SHARED(Locks::mutator_lock_); 253 254 bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement) 255 REQUIRES_SHARED(Locks::mutator_lock_); 256 257 // Add a type guard on the given `receiver`. This will add to the graph: 258 // i0 = HFieldGet(receiver, klass) 259 // i1 = HLoadClass(class_index, is_referrer) 260 // i2 = HNotEqual(i0, i1) 261 // 262 // And if `with_deoptimization` is true: 263 // HDeoptimize(i2) 264 // 265 // The method returns the `HNotEqual`, that will be used for polymorphic inlining. 266 HInstruction* AddTypeGuard(HInstruction* receiver, 267 HInstruction* cursor, 268 HBasicBlock* bb_cursor, 269 dex::TypeIndex class_index, 270 Handle<mirror::Class> klass, 271 HInstruction* invoke_instruction, 272 bool with_deoptimization) 273 REQUIRES_SHARED(Locks::mutator_lock_); 274 275 /* 276 * Ad-hoc implementation for implementing a diamond pattern in the graph for 277 * polymorphic inlining: 278 * 1) `compare` becomes the input of the new `HIf`. 279 * 2) Everything up until `invoke_instruction` is in the then branch (could 280 * contain multiple blocks). 281 * 3) `invoke_instruction` is moved to the otherwise block. 282 * 4) If `return_replacement` is not null, the merge block will have 283 * a phi whose inputs are `return_replacement` and `invoke_instruction`. 284 * 285 * Before: 286 * Block1 287 * compare 288 * ... 289 * invoke_instruction 290 * 291 * After: 292 * Block1 293 * compare 294 * if 295 * / \ 296 * / \ 297 * Then block Otherwise block 298 * ... invoke_instruction 299 * \ / 300 * \ / 301 * Merge block 302 * phi(return_replacement, invoke_instruction) 303 */ 304 void CreateDiamondPatternForPolymorphicInline(HInstruction* compare, 305 HInstruction* return_replacement, 306 HInstruction* invoke_instruction); 307 308 // Update the inlining budget based on `total_number_of_instructions_`. 309 void UpdateInliningBudget(); 310 311 // Count the number of calls of `method` being inlined recursively. 312 size_t CountRecursiveCallsOf(ArtMethod* method) const; 313 314 // Pretty-print for spaces during logging. 315 std::string DepthString(int line) const; 316 317 HGraph* const outermost_graph_; 318 const DexCompilationUnit& outer_compilation_unit_; 319 const DexCompilationUnit& caller_compilation_unit_; 320 CodeGenerator* const codegen_; 321 const size_t total_number_of_dex_registers_; 322 size_t total_number_of_instructions_; 323 324 // The 'parent' inliner, that means the inlinigng optimization that requested 325 // `graph_` to be inlined. 326 const HInliner* const parent_; 327 const size_t depth_; 328 329 // The budget left for inlining, in number of instructions. 330 size_t inlining_budget_; 331 332 // Used to record stats about optimizations on the inlined graph. 333 // If the inlining is successful, these stats are merged to the caller graph's stats. 334 OptimizingCompilerStats* inline_stats_; 335 336 DISALLOW_COPY_AND_ASSIGN(HInliner); 337 }; 338 339 } // namespace art 340 341 #endif // ART_COMPILER_OPTIMIZING_INLINER_H_ 342