1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
18 #define ART_COMPILER_OPTIMIZING_INLINER_H_
19 
20 #include "dex/dex_file_types.h"
21 #include "dex/invoke_type.h"
22 #include "optimization.h"
23 #include "profile/profile_compilation_info.h"
24 
25 namespace art {
26 
27 class CodeGenerator;
28 class DexCompilationUnit;
29 class HGraph;
30 class HInvoke;
31 class OptimizingCompilerStats;
32 
33 class HInliner : public HOptimization {
34  public:
35   HInliner(HGraph* outer_graph,
36            HGraph* outermost_graph,
37            CodeGenerator* codegen,
38            const DexCompilationUnit& outer_compilation_unit,
39            const DexCompilationUnit& caller_compilation_unit,
40            OptimizingCompilerStats* stats,
41            size_t total_number_of_dex_registers,
42            size_t total_number_of_instructions,
43            HInliner* parent,
44            size_t depth = 0,
45            const char* name = kInlinerPassName)
HOptimization(outer_graph,name,stats)46       : HOptimization(outer_graph, name, stats),
47         outermost_graph_(outermost_graph),
48         outer_compilation_unit_(outer_compilation_unit),
49         caller_compilation_unit_(caller_compilation_unit),
50         codegen_(codegen),
51         total_number_of_dex_registers_(total_number_of_dex_registers),
52         total_number_of_instructions_(total_number_of_instructions),
53         parent_(parent),
54         depth_(depth),
55         inlining_budget_(0),
56         inline_stats_(nullptr) {}
57 
58   bool Run() override;
59 
60   static constexpr const char* kInlinerPassName = "inliner";
61 
62  private:
63   enum InlineCacheType {
64     kInlineCacheNoData = 0,
65     kInlineCacheUninitialized = 1,
66     kInlineCacheMonomorphic = 2,
67     kInlineCachePolymorphic = 3,
68     kInlineCacheMegamorphic = 4,
69     kInlineCacheMissingTypes = 5
70   };
71 
72   bool TryInline(HInvoke* invoke_instruction);
73 
74   // Attempt to resolve the target of the invoke instruction to an acutal call
75   // target.
76   //
77   // Returns the target directly in the case of static or direct invokes.
78   // Otherwise, uses CHA devirtualization or other methods to try to find the
79   // call target.
80   ArtMethod* FindActualCallTarget(HInvoke* invoke_instruction, bool* cha_devirtualize)
81     REQUIRES_SHARED(Locks::mutator_lock_);
82 
83   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
84   // reference type propagation can run after the inlining. If the inlining is successful, this
85   // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true,
86   // a CHA guard needs to be added for the inlining.
87   bool TryInlineAndReplace(HInvoke* invoke_instruction,
88                            ArtMethod* resolved_method,
89                            ReferenceTypeInfo receiver_type,
90                            bool do_rtp,
91                            bool cha_devirtualize)
92     REQUIRES_SHARED(Locks::mutator_lock_);
93 
94   bool TryBuildAndInline(HInvoke* invoke_instruction,
95                          ArtMethod* resolved_method,
96                          ReferenceTypeInfo receiver_type,
97                          HInstruction** return_replacement)
98     REQUIRES_SHARED(Locks::mutator_lock_);
99 
100   bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
101                                ArtMethod* resolved_method,
102                                ReferenceTypeInfo receiver_type,
103                                HInstruction** return_replacement)
104     REQUIRES_SHARED(Locks::mutator_lock_);
105 
106   // Substitutes parameters in the callee graph with their values from the caller.
107   void SubstituteArguments(HGraph* callee_graph,
108                            HInvoke* invoke_instruction,
109                            ReferenceTypeInfo receiver_type,
110                            const DexCompilationUnit& dex_compilation_unit)
111     REQUIRES_SHARED(Locks::mutator_lock_);
112 
113   // Run simple optimizations on `callee_graph`.
114   void RunOptimizations(HGraph* callee_graph,
115                         const dex::CodeItem* code_item,
116                         const DexCompilationUnit& dex_compilation_unit)
117     REQUIRES_SHARED(Locks::mutator_lock_);
118 
119   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
120   bool TryPatternSubstitution(HInvoke* invoke_instruction,
121                               ArtMethod* resolved_method,
122                               HInstruction** return_replacement)
123     REQUIRES_SHARED(Locks::mutator_lock_);
124 
125   // Returns whether inlining is allowed based on ART semantics.
126   bool IsInliningAllowed(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
127     REQUIRES_SHARED(Locks::mutator_lock_);
128 
129 
130   // Returns whether ART supports inlining this method.
131   //
132   // Some methods are not supported because they have features for which inlining
133   // is not implemented. For example, we do not currently support inlining throw
134   // instructions into a try block.
135   bool IsInliningSupported(const HInvoke* invoke_instruction,
136                            art::ArtMethod* method,
137                            const CodeItemDataAccessor& accessor) const
138     REQUIRES_SHARED(Locks::mutator_lock_);
139 
140   // Returns whether the inlining budget allows inlining method.
141   //
142   // For example, this checks whether the function has grown too large and
143   // inlining should be prevented.
144   bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
145     REQUIRES_SHARED(Locks::mutator_lock_);
146 
147   // Inspects the body of a method (callee_graph) and returns whether it can be
148   // inlined.
149   //
150   // This checks for instructions and constructs that we do not support
151   // inlining, such as inlining a throw instruction into a try block.
152   bool CanInlineBody(const HGraph* callee_graph,
153                      const HBasicBlock* target_block,
154                      size_t* out_number_of_instructions) const
155     REQUIRES_SHARED(Locks::mutator_lock_);
156 
157   // Create a new HInstanceFieldGet.
158   HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
159                                             ArtMethod* referrer,
160                                             HInstruction* obj);
161   // Create a new HInstanceFieldSet.
162   HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
163                                             ArtMethod* referrer,
164                                             HInstruction* obj,
165                                             HInstruction* value,
166                                             bool* is_final = nullptr);
167 
168   // Try inlining the invoke instruction using inline caches.
169   bool TryInlineFromInlineCache(
170       const DexFile& caller_dex_file,
171       HInvoke* invoke_instruction,
172       ArtMethod* resolved_method)
173     REQUIRES_SHARED(Locks::mutator_lock_);
174 
175   // Try getting the inline cache from JIT code cache.
176   // Return true if the inline cache was successfully allocated and the
177   // invoke info was found in the profile info.
178   InlineCacheType GetInlineCacheJIT(
179       HInvoke* invoke_instruction,
180       StackHandleScope<1>* hs,
181       /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
182     REQUIRES_SHARED(Locks::mutator_lock_);
183 
184   // Try getting the inline cache from AOT offline profile.
185   // Return true if the inline cache was successfully allocated and the
186   // invoke info was found in the profile info.
187   InlineCacheType GetInlineCacheAOT(const DexFile& caller_dex_file,
188       HInvoke* invoke_instruction,
189       StackHandleScope<1>* hs,
190       /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
191     REQUIRES_SHARED(Locks::mutator_lock_);
192 
193   // Extract the mirror classes from the offline profile and add them to the `inline_cache`.
194   // Note that even if we have profile data for the invoke the inline_cache might contain
195   // only null entries if the types cannot be resolved.
196   InlineCacheType ExtractClassesFromOfflineProfile(
197       const HInvoke* invoke_instruction,
198       const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
199       /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
200     REQUIRES_SHARED(Locks::mutator_lock_);
201 
202   // Compute the inline cache type.
203   InlineCacheType GetInlineCacheType(
204       const Handle<mirror::ObjectArray<mirror::Class>>& classes)
205     REQUIRES_SHARED(Locks::mutator_lock_);
206 
207   // Try to inline the target of a monomorphic call. If successful, the code
208   // in the graph will look like:
209   // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
210   // ... // inlined code
211   bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
212                                 ArtMethod* resolved_method,
213                                 Handle<mirror::ObjectArray<mirror::Class>> classes)
214     REQUIRES_SHARED(Locks::mutator_lock_);
215 
216   // Try to inline targets of a polymorphic call.
217   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
218                                 ArtMethod* resolved_method,
219                                 Handle<mirror::ObjectArray<mirror::Class>> classes)
220     REQUIRES_SHARED(Locks::mutator_lock_);
221 
222   bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
223                                             ArtMethod* resolved_method,
224                                             Handle<mirror::ObjectArray<mirror::Class>> classes)
225     REQUIRES_SHARED(Locks::mutator_lock_);
226 
227   // Returns whether or not we should use only polymorphic inlining with no deoptimizations.
228   bool UseOnlyPolymorphicInliningWithNoDeopt();
229 
230   // Try CHA-based devirtualization to change virtual method calls into
231   // direct calls.
232   // Returns the actual method that resolved_method can be devirtualized to.
233   ArtMethod* TryCHADevirtualization(ArtMethod* resolved_method)
234     REQUIRES_SHARED(Locks::mutator_lock_);
235 
236   // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
237   // should_deoptimize flag and if it's true, does deoptimization.
238   void AddCHAGuard(HInstruction* invoke_instruction,
239                    uint32_t dex_pc,
240                    HInstruction* cursor,
241                    HBasicBlock* bb_cursor);
242 
243   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
244                                            HInstruction* receiver,
245                                            uint32_t dex_pc) const
246     REQUIRES_SHARED(Locks::mutator_lock_);
247 
248   void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
249     REQUIRES_SHARED(Locks::mutator_lock_);
250 
251   bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
252     REQUIRES_SHARED(Locks::mutator_lock_);
253 
254   bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement)
255     REQUIRES_SHARED(Locks::mutator_lock_);
256 
257   // Add a type guard on the given `receiver`. This will add to the graph:
258   // i0 = HFieldGet(receiver, klass)
259   // i1 = HLoadClass(class_index, is_referrer)
260   // i2 = HNotEqual(i0, i1)
261   //
262   // And if `with_deoptimization` is true:
263   // HDeoptimize(i2)
264   //
265   // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
266   HInstruction* AddTypeGuard(HInstruction* receiver,
267                              HInstruction* cursor,
268                              HBasicBlock* bb_cursor,
269                              dex::TypeIndex class_index,
270                              Handle<mirror::Class> klass,
271                              HInstruction* invoke_instruction,
272                              bool with_deoptimization)
273     REQUIRES_SHARED(Locks::mutator_lock_);
274 
275   /*
276    * Ad-hoc implementation for implementing a diamond pattern in the graph for
277    * polymorphic inlining:
278    * 1) `compare` becomes the input of the new `HIf`.
279    * 2) Everything up until `invoke_instruction` is in the then branch (could
280    *    contain multiple blocks).
281    * 3) `invoke_instruction` is moved to the otherwise block.
282    * 4) If `return_replacement` is not null, the merge block will have
283    *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
284    *
285    * Before:
286    *             Block1
287    *             compare
288    *              ...
289    *         invoke_instruction
290    *
291    * After:
292    *            Block1
293    *            compare
294    *              if
295    *          /        \
296    *         /          \
297    *   Then block    Otherwise block
298    *      ...       invoke_instruction
299    *       \              /
300    *        \            /
301    *          Merge block
302    *  phi(return_replacement, invoke_instruction)
303    */
304   void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
305                                                 HInstruction* return_replacement,
306                                                 HInstruction* invoke_instruction);
307 
308   // Update the inlining budget based on `total_number_of_instructions_`.
309   void UpdateInliningBudget();
310 
311   // Count the number of calls of `method` being inlined recursively.
312   size_t CountRecursiveCallsOf(ArtMethod* method) const;
313 
314   // Pretty-print for spaces during logging.
315   std::string DepthString(int line) const;
316 
317   HGraph* const outermost_graph_;
318   const DexCompilationUnit& outer_compilation_unit_;
319   const DexCompilationUnit& caller_compilation_unit_;
320   CodeGenerator* const codegen_;
321   const size_t total_number_of_dex_registers_;
322   size_t total_number_of_instructions_;
323 
324   // The 'parent' inliner, that means the inlinigng optimization that requested
325   // `graph_` to be inlined.
326   const HInliner* const parent_;
327   const size_t depth_;
328 
329   // The budget left for inlining, in number of instructions.
330   size_t inlining_budget_;
331 
332   // Used to record stats about optimizations on the inlined graph.
333   // If the inlining is successful, these stats are merged to the caller graph's stats.
334   OptimizingCompilerStats* inline_stats_;
335 
336   DISALLOW_COPY_AND_ASSIGN(HInliner);
337 };
338 
339 }  // namespace art
340 
341 #endif  // ART_COMPILER_OPTIMIZING_INLINER_H_
342