1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <string>
18 
19 #include "scheduler.h"
20 
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "data_type-inl.h"
24 #include "prepare_for_register_allocation.h"
25 
26 #ifdef ART_ENABLE_CODEGEN_arm64
27 #include "scheduler_arm64.h"
28 #endif
29 
30 #ifdef ART_ENABLE_CODEGEN_arm
31 #include "scheduler_arm.h"
32 #endif
33 
34 namespace art {
35 
AddDependency(SchedulingNode * node,SchedulingNode * dependency,bool is_data_dependency)36 void SchedulingGraph::AddDependency(SchedulingNode* node,
37                                     SchedulingNode* dependency,
38                                     bool is_data_dependency) {
39   if (node == nullptr || dependency == nullptr) {
40     // A `nullptr` node indicates an instruction out of scheduling range (eg. in
41     // an other block), so we do not need to add a dependency edge to the graph.
42     return;
43   }
44 
45   if (is_data_dependency) {
46     node->AddDataPredecessor(dependency);
47   } else {
48     node->AddOtherPredecessor(dependency);
49   }
50 }
51 
HasReorderingDependency(const HInstruction * instr1,const HInstruction * instr2)52 bool SideEffectDependencyAnalysis::HasReorderingDependency(const HInstruction* instr1,
53                                                            const HInstruction* instr2) {
54   SideEffects instr1_side_effects = instr1->GetSideEffects();
55   SideEffects instr2_side_effects = instr2->GetSideEffects();
56 
57   // Read after write.
58   if (instr1_side_effects.MayDependOn(instr2_side_effects)) {
59     return true;
60   }
61 
62   // Write after read.
63   if (instr2_side_effects.MayDependOn(instr1_side_effects)) {
64     return true;
65   }
66 
67   // Memory write after write.
68   if (instr1_side_effects.DoesAnyWrite() && instr2_side_effects.DoesAnyWrite()) {
69     return true;
70   }
71 
72   return false;
73 }
74 
ArrayAccessHeapLocation(HInstruction * instruction) const75 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessHeapLocation(
76     HInstruction* instruction) const {
77   DCHECK(heap_location_collector_ != nullptr);
78   size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction);
79   // This array access should be analyzed and added to HeapLocationCollector before.
80   DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
81   return heap_loc;
82 }
83 
ArrayAccessMayAlias(HInstruction * instr1,HInstruction * instr2) const84 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessMayAlias(
85     HInstruction* instr1, HInstruction* instr2) const {
86   DCHECK(heap_location_collector_ != nullptr);
87   size_t instr1_heap_loc = ArrayAccessHeapLocation(instr1);
88   size_t instr2_heap_loc = ArrayAccessHeapLocation(instr2);
89 
90   // For example: arr[0] and arr[0]
91   if (instr1_heap_loc == instr2_heap_loc) {
92     return true;
93   }
94 
95   // For example: arr[0] and arr[i]
96   if (heap_location_collector_->MayAlias(instr1_heap_loc, instr2_heap_loc)) {
97     return true;
98   }
99 
100   return false;
101 }
102 
IsArrayAccess(const HInstruction * instruction)103 static bool IsArrayAccess(const HInstruction* instruction) {
104   return instruction->IsArrayGet() || instruction->IsArraySet();
105 }
106 
IsInstanceFieldAccess(const HInstruction * instruction)107 static bool IsInstanceFieldAccess(const HInstruction* instruction) {
108   return instruction->IsInstanceFieldGet() ||
109          instruction->IsInstanceFieldSet() ||
110          instruction->IsUnresolvedInstanceFieldGet() ||
111          instruction->IsUnresolvedInstanceFieldSet();
112 }
113 
IsStaticFieldAccess(const HInstruction * instruction)114 static bool IsStaticFieldAccess(const HInstruction* instruction) {
115   return instruction->IsStaticFieldGet() ||
116          instruction->IsStaticFieldSet() ||
117          instruction->IsUnresolvedStaticFieldGet() ||
118          instruction->IsUnresolvedStaticFieldSet();
119 }
120 
IsResolvedFieldAccess(const HInstruction * instruction)121 static bool IsResolvedFieldAccess(const HInstruction* instruction) {
122   return instruction->IsInstanceFieldGet() ||
123          instruction->IsInstanceFieldSet() ||
124          instruction->IsStaticFieldGet() ||
125          instruction->IsStaticFieldSet();
126 }
127 
IsUnresolvedFieldAccess(const HInstruction * instruction)128 static bool IsUnresolvedFieldAccess(const HInstruction* instruction) {
129   return instruction->IsUnresolvedInstanceFieldGet() ||
130          instruction->IsUnresolvedInstanceFieldSet() ||
131          instruction->IsUnresolvedStaticFieldGet() ||
132          instruction->IsUnresolvedStaticFieldSet();
133 }
134 
IsFieldAccess(const HInstruction * instruction)135 static bool IsFieldAccess(const HInstruction* instruction) {
136   return IsResolvedFieldAccess(instruction) || IsUnresolvedFieldAccess(instruction);
137 }
138 
GetFieldInfo(const HInstruction * instruction)139 static const FieldInfo* GetFieldInfo(const HInstruction* instruction) {
140   if (instruction->IsInstanceFieldGet()) {
141     return &instruction->AsInstanceFieldGet()->GetFieldInfo();
142   } else if (instruction->IsInstanceFieldSet()) {
143     return &instruction->AsInstanceFieldSet()->GetFieldInfo();
144   } else if (instruction->IsStaticFieldGet()) {
145     return &instruction->AsStaticFieldGet()->GetFieldInfo();
146   } else if (instruction->IsStaticFieldSet()) {
147     return &instruction->AsStaticFieldSet()->GetFieldInfo();
148   } else {
149     LOG(FATAL) << "Unexpected field access type";
150     UNREACHABLE();
151   }
152 }
153 
FieldAccessHeapLocation(const HInstruction * instr) const154 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessHeapLocation(
155     const HInstruction* instr) const {
156   DCHECK(instr != nullptr);
157   DCHECK(GetFieldInfo(instr) != nullptr);
158   DCHECK(heap_location_collector_ != nullptr);
159 
160   size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(instr->InputAt(0),
161                                                                    GetFieldInfo(instr));
162   // This field access should be analyzed and added to HeapLocationCollector before.
163   DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
164 
165   return heap_loc;
166 }
167 
FieldAccessMayAlias(const HInstruction * instr1,const HInstruction * instr2) const168 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessMayAlias(
169     const HInstruction* instr1, const HInstruction* instr2) const {
170   DCHECK(heap_location_collector_ != nullptr);
171 
172   // Static and instance field accesses should not alias.
173   if ((IsInstanceFieldAccess(instr1) && IsStaticFieldAccess(instr2)) ||
174       (IsStaticFieldAccess(instr1) && IsInstanceFieldAccess(instr2))) {
175     return false;
176   }
177 
178   // If either of the field accesses is unresolved.
179   if (IsUnresolvedFieldAccess(instr1) || IsUnresolvedFieldAccess(instr2)) {
180     // Conservatively treat these two accesses may alias.
181     return true;
182   }
183 
184   // If both fields accesses are resolved.
185   size_t instr1_field_access_heap_loc = FieldAccessHeapLocation(instr1);
186   size_t instr2_field_access_heap_loc = FieldAccessHeapLocation(instr2);
187 
188   if (instr1_field_access_heap_loc == instr2_field_access_heap_loc) {
189     return true;
190   }
191 
192   if (!heap_location_collector_->MayAlias(instr1_field_access_heap_loc,
193                                           instr2_field_access_heap_loc)) {
194     return false;
195   }
196 
197   return true;
198 }
199 
HasMemoryDependency(HInstruction * instr1,HInstruction * instr2) const200 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::HasMemoryDependency(
201     HInstruction* instr1, HInstruction* instr2) const {
202   if (!HasReorderingDependency(instr1, instr2)) {
203     return false;
204   }
205 
206   if (heap_location_collector_ == nullptr ||
207       heap_location_collector_->GetNumberOfHeapLocations() == 0) {
208     // Without HeapLocation information from load store analysis,
209     // we cannot do further disambiguation analysis on these two instructions.
210     // Just simply say that those two instructions have memory dependency.
211     return true;
212   }
213 
214   if (IsArrayAccess(instr1) && IsArrayAccess(instr2)) {
215     return ArrayAccessMayAlias(instr1, instr2);
216   }
217   if (IsFieldAccess(instr1) && IsFieldAccess(instr2)) {
218     return FieldAccessMayAlias(instr1, instr2);
219   }
220 
221   // TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess
222   if (instr1->IsVecMemoryOperation() && instr2->IsVecMemoryOperation()) {
223     return true;
224   }
225   if (instr1->IsVecMemoryOperation() && IsArrayAccess(instr2)) {
226     return true;
227   }
228   if (IsArrayAccess(instr1) && instr2->IsVecMemoryOperation()) {
229     return true;
230   }
231 
232   // Heap accesses of different kinds should not alias.
233   if (IsArrayAccess(instr1) && IsFieldAccess(instr2)) {
234     return false;
235   }
236   if (IsFieldAccess(instr1) && IsArrayAccess(instr2)) {
237     return false;
238   }
239   if (instr1->IsVecMemoryOperation() && IsFieldAccess(instr2)) {
240     return false;
241   }
242   if (IsFieldAccess(instr1) && instr2->IsVecMemoryOperation()) {
243     return false;
244   }
245 
246   // We conservatively treat all other cases having dependency,
247   // for example, Invoke and ArrayGet.
248   return true;
249 }
250 
HasExceptionDependency(const HInstruction * instr1,const HInstruction * instr2)251 bool SideEffectDependencyAnalysis::HasExceptionDependency(const HInstruction* instr1,
252                                                           const HInstruction* instr2) {
253   if (instr2->CanThrow() && instr1->GetSideEffects().DoesAnyWrite()) {
254     return true;
255   }
256   if (instr2->GetSideEffects().DoesAnyWrite() && instr1->CanThrow()) {
257     return true;
258   }
259   if (instr2->CanThrow() && instr1->CanThrow()) {
260     return true;
261   }
262 
263   // Above checks should cover all cases where we cannot reorder two
264   // instructions which may throw exception.
265   return false;
266 }
267 
268 // Check if the specified instruction is a better candidate which more likely will
269 // have other instructions depending on it.
IsBetterCandidateWithMoreLikelyDependencies(HInstruction * new_candidate,HInstruction * old_candidate)270 static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate,
271                                                         HInstruction* old_candidate) {
272   if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) {
273     // Weaker side effects.
274     return false;
275   }
276   if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) {
277     // Same side effects, check if `new_candidate` has stronger `CanThrow()`.
278     return new_candidate->CanThrow() && !old_candidate->CanThrow();
279   } else {
280     // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`.
281     return new_candidate->CanThrow() || !old_candidate->CanThrow();
282   }
283 }
284 
AddCrossIterationDependencies(SchedulingNode * node)285 void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) {
286   for (HInstruction* instruction : node->GetInstruction()->GetInputs()) {
287     // Having a phi-function from a loop header as an input means the current node of the
288     // scheduling graph has a cross-iteration dependency because such phi-functions bring values
289     // from the previous iteration to the current iteration.
290     if (!instruction->IsLoopHeaderPhi()) {
291       continue;
292     }
293     for (HInstruction* phi_input : instruction->GetInputs()) {
294       // As a scheduling graph of the current basic block is built by
295       // processing instructions bottom-up, nullptr returned by GetNode means
296       // an instruction defining a value for the phi is either before the
297       // instruction represented by node or it is in a different basic block.
298       SchedulingNode* def_node = GetNode(phi_input);
299 
300       // We don't create a dependency if there are uses besides the use in phi.
301       // In such cases a register to hold phi_input is usually allocated and
302       // a MOV instruction is generated. In cases with multiple uses and no MOV
303       // instruction, reordering creating a MOV instruction can improve
304       // performance more than an attempt to avoid a MOV instruction.
305       if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) {
306         // We have an implicit data dependency between node and def_node.
307         // AddAddDataDependency cannot be used because it is for explicit data dependencies.
308         // So AddOtherDependency is used.
309         AddOtherDependency(def_node, node);
310       }
311     }
312   }
313 }
314 
AddDependencies(SchedulingNode * instruction_node,bool is_scheduling_barrier)315 void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node,
316                                       bool is_scheduling_barrier) {
317   HInstruction* instruction = instruction_node->GetInstruction();
318 
319   // Define-use dependencies.
320   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
321     AddDataDependency(GetNode(use.GetUser()), instruction_node);
322   }
323 
324   // Scheduling barrier dependencies.
325   DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_);
326   if (contains_scheduling_barrier_) {
327     // A barrier depends on instructions after it. And instructions before the
328     // barrier depend on it.
329     for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
330       SchedulingNode* other_node = GetNode(other);
331       CHECK(other_node != nullptr)
332           << other->DebugName()
333           << " is in block " << other->GetBlock()->GetBlockId()
334           << ", and expected in block " << instruction->GetBlock()->GetBlockId();
335       bool other_is_barrier = other_node->IsSchedulingBarrier();
336       if (is_scheduling_barrier || other_is_barrier) {
337         AddOtherDependency(other_node, instruction_node);
338       }
339       if (other_is_barrier) {
340         // This other scheduling barrier guarantees ordering of instructions after
341         // it, so avoid creating additional useless dependencies in the graph.
342         // For example if we have
343         //     instr_1
344         //     barrier_2
345         //     instr_3
346         //     barrier_4
347         //     instr_5
348         // we only create the following non-data dependencies
349         //     1 -> 2
350         //     2 -> 3
351         //     2 -> 4
352         //     3 -> 4
353         //     4 -> 5
354         // and do not create
355         //     1 -> 4
356         //     2 -> 5
357         // Note that in this example we could also avoid creating the dependency
358         // `2 -> 4`.  But if we remove `instr_3` that dependency is required to
359         // order the barriers. So we generate it to avoid a special case.
360         break;
361       }
362     }
363   }
364 
365   // Side effect dependencies.
366   if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
367     HInstruction* dep_chain_candidate = nullptr;
368     for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
369       SchedulingNode* other_node = GetNode(other);
370       if (other_node->IsSchedulingBarrier()) {
371         // We have reached a scheduling barrier so we can stop further
372         // processing.
373         //
374         // As a "other" dependency is not set up if a data dependency exists, we need to check that
375         // one of them must exist.
376         DCHECK(other_node->HasOtherDependency(instruction_node)
377                || other_node->HasDataDependency(instruction_node));
378         break;
379       }
380       if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) {
381         if (dep_chain_candidate != nullptr &&
382             side_effect_dependency_analysis_.HasSideEffectDependency(other, dep_chain_candidate)) {
383           // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency.
384         } else {
385           AddOtherDependency(other_node, instruction_node);
386         }
387         // Check if `other` is a better candidate which more likely will have other instructions
388         // depending on it.
389         if (dep_chain_candidate == nullptr ||
390             IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) {
391           dep_chain_candidate = other;
392         }
393       }
394     }
395   }
396 
397   // Environment dependencies.
398   // We do not need to process those if the instruction is a scheduling barrier,
399   // since the barrier already has non-data dependencies on all following
400   // instructions.
401   if (!is_scheduling_barrier) {
402     for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
403       // Note that here we could stop processing if the environment holder is
404       // across a scheduling barrier. But checking this would likely require
405       // more work than simply iterating through environment uses.
406       AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
407     }
408   }
409 
410   AddCrossIterationDependencies(instruction_node);
411 }
412 
InstructionTypeId(const HInstruction * instruction)413 static const std::string InstructionTypeId(const HInstruction* instruction) {
414   return DataType::TypeId(instruction->GetType()) + std::to_string(instruction->GetId());
415 }
416 
417 // Ideally we would reuse the graph visualizer code, but it is not available
418 // from here and it is not worth moving all that code only for our use.
DumpAsDotNode(std::ostream & output,const SchedulingNode * node)419 static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
420   const HInstruction* instruction = node->GetInstruction();
421   // Use the instruction typed id as the node identifier.
422   std::string instruction_id = InstructionTypeId(instruction);
423   output << instruction_id << "[shape=record, label=\""
424       << instruction_id << ' ' << instruction->DebugName() << " [";
425   // List the instruction's inputs in its description. When visualizing the
426   // graph this helps differentiating data inputs from other dependencies.
427   const char* seperator = "";
428   for (const HInstruction* input : instruction->GetInputs()) {
429     output << seperator << InstructionTypeId(input);
430     seperator = ",";
431   }
432   output << "]";
433   // Other properties of the node.
434   output << "\\ninternal_latency: " << node->GetInternalLatency();
435   output << "\\ncritical_path: " << node->GetCriticalPath();
436   if (node->IsSchedulingBarrier()) {
437     output << "\\n(barrier)";
438   }
439   output << "\"];\n";
440   // We want program order to go from top to bottom in the graph output, so we
441   // reverse the edges and specify `dir=back`.
442   for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
443     const HInstruction* predecessor_instruction = predecessor->GetInstruction();
444     output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
445         << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
446   }
447   for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
448     const HInstruction* predecessor_instruction = predecessor->GetInstruction();
449     output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
450         << "[dir=back,color=blue]\n";
451   }
452 }
453 
DumpAsDotGraph(const std::string & description,const ScopedArenaVector<SchedulingNode * > & initial_candidates)454 void SchedulingGraph::DumpAsDotGraph(const std::string& description,
455                                      const ScopedArenaVector<SchedulingNode*>& initial_candidates) {
456   // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
457   // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
458   std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
459   // Description of this graph, as a comment.
460   output << "// " << description << "\n";
461   // Start the dot graph. Use an increasing index for easier differentiation.
462   output << "digraph G {\n";
463   for (const auto& entry : nodes_map_) {
464     SchedulingNode* node = entry.second.get();
465     DumpAsDotNode(output, node);
466   }
467   // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
468   for (SchedulingNode* node : initial_candidates) {
469     const HInstruction* instruction = node->GetInstruction();
470     output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
471       << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
472   }
473   // End of the dot graph.
474   output << "}\n";
475   output.close();
476 }
477 
SelectMaterializedCondition(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph) const478 SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
479     ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
480   // Schedule condition inputs that can be materialized immediately before their use.
481   // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
482   // immediately, because it is a materialized condition, and will be emitted right before HSelect
483   // in codegen phase.
484   //
485   // i20 HLessThan [...]                  HLessThan    HAdd      HAdd
486   // i21 HAdd [...]                ===>      |          |         |
487   // i22 HAdd [...]                          +----------+---------+
488   // i23 HSelect [i21, i22, i20]                     HSelect
489 
490   if (prev_select_ == nullptr) {
491     return nullptr;
492   }
493 
494   const HInstruction* instruction = prev_select_->GetInstruction();
495   const HCondition* condition = nullptr;
496   DCHECK(instruction != nullptr);
497 
498   if (instruction->IsIf()) {
499     condition = instruction->AsIf()->InputAt(0)->AsCondition();
500   } else if (instruction->IsSelect()) {
501     condition = instruction->AsSelect()->GetCondition()->AsCondition();
502   }
503 
504   SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
505 
506   if ((condition_node != nullptr) &&
507       condition->HasOnlyOneNonEnvironmentUse() &&
508       ContainsElement(*nodes, condition_node)) {
509     DCHECK(!condition_node->HasUnscheduledSuccessors());
510     // Remove the condition from the list of candidates and schedule it.
511     RemoveElement(*nodes, condition_node);
512     return condition_node;
513   }
514 
515   return nullptr;
516 }
517 
PopHighestPriorityNode(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph)518 SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
519     ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
520   DCHECK(!nodes->empty());
521   SchedulingNode* select_node = nullptr;
522 
523   // Optimize for materialized condition and its emit before use scenario.
524   select_node = SelectMaterializedCondition(nodes, graph);
525 
526   if (select_node == nullptr) {
527     // Get highest priority node based on critical path information.
528     select_node = (*nodes)[0];
529     size_t select = 0;
530     for (size_t i = 1, e = nodes->size(); i < e; i++) {
531       SchedulingNode* check = (*nodes)[i];
532       SchedulingNode* candidate = (*nodes)[select];
533       select_node = GetHigherPrioritySchedulingNode(candidate, check);
534       if (select_node == check) {
535         select = i;
536       }
537     }
538     DeleteNodeAtIndex(nodes, select);
539   }
540 
541   prev_select_ = select_node;
542   return select_node;
543 }
544 
GetHigherPrioritySchedulingNode(SchedulingNode * candidate,SchedulingNode * check) const545 SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
546     SchedulingNode* candidate, SchedulingNode* check) const {
547   uint32_t candidate_path = candidate->GetCriticalPath();
548   uint32_t check_path = check->GetCriticalPath();
549   // First look at the critical_path.
550   if (check_path != candidate_path) {
551     return check_path < candidate_path ? check : candidate;
552   }
553   // If both critical paths are equal, schedule instructions with a higher latency
554   // first in program order.
555   return check->GetLatency() < candidate->GetLatency() ? check : candidate;
556 }
557 
Schedule(HGraph * graph)558 void HScheduler::Schedule(HGraph* graph) {
559   // We run lsa here instead of in a separate pass to better control whether we
560   // should run the analysis or not.
561   const HeapLocationCollector* heap_location_collector = nullptr;
562   ScopedArenaAllocator allocator(graph->GetArenaStack());
563   LoadStoreAnalysis lsa(graph, &allocator);
564   if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
565     lsa.Run();
566     heap_location_collector = &lsa.GetHeapLocationCollector();
567   }
568 
569   for (HBasicBlock* block : graph->GetReversePostOrder()) {
570     if (IsSchedulable(block)) {
571       Schedule(block, heap_location_collector);
572     }
573   }
574 }
575 
Schedule(HBasicBlock * block,const HeapLocationCollector * heap_location_collector)576 void HScheduler::Schedule(HBasicBlock* block,
577                           const HeapLocationCollector* heap_location_collector) {
578   ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack());
579   ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler));
580 
581   // Build the scheduling graph.
582   SchedulingGraph scheduling_graph(&allocator, heap_location_collector);
583   for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
584     HInstruction* instruction = it.Current();
585     CHECK_EQ(instruction->GetBlock(), block)
586         << instruction->DebugName()
587         << " is in block " << instruction->GetBlock()->GetBlockId()
588         << ", and expected in block " << block->GetBlockId();
589     SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction));
590     CalculateLatency(node);
591     scheduling_nodes.push_back(node);
592   }
593 
594   if (scheduling_graph.Size() <= 1) {
595     return;
596   }
597 
598   cursor_ = block->GetLastInstruction();
599 
600   // The list of candidates for scheduling. A node becomes a candidate when all
601   // its predecessors have been scheduled.
602   ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler));
603 
604   // Find the initial candidates for scheduling.
605   for (SchedulingNode* node : scheduling_nodes) {
606     if (!node->HasUnscheduledSuccessors()) {
607       node->MaybeUpdateCriticalPath(node->GetLatency());
608       candidates.push_back(node);
609     }
610   }
611 
612   ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler));
613   if (kDumpDotSchedulingGraphs) {
614     // Remember the list of initial candidates for debug output purposes.
615     initial_candidates.assign(candidates.begin(), candidates.end());
616   }
617 
618   // Schedule all nodes.
619   selector_->Reset();
620   while (!candidates.empty()) {
621     SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph);
622     Schedule(node, &candidates);
623   }
624 
625   if (kDumpDotSchedulingGraphs) {
626     // Dump the graph in `dot` format.
627     HGraph* graph = block->GetGraph();
628     std::stringstream description;
629     description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
630         << " B" << block->GetBlockId();
631     scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates);
632   }
633 }
634 
Schedule(SchedulingNode * scheduling_node,ScopedArenaVector<SchedulingNode * > * candidates)635 void HScheduler::Schedule(SchedulingNode* scheduling_node,
636                           /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) {
637   // Check whether any of the node's predecessors will be valid candidates after
638   // this node is scheduled.
639   uint32_t path_to_node = scheduling_node->GetCriticalPath();
640   for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
641     predecessor->MaybeUpdateCriticalPath(
642         path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
643     predecessor->DecrementNumberOfUnscheduledSuccessors();
644     if (!predecessor->HasUnscheduledSuccessors()) {
645       candidates->push_back(predecessor);
646     }
647   }
648   for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
649     // Do not update the critical path.
650     // The 'other' (so 'non-data') dependencies (usually) do not represent a
651     // 'material' dependency of nodes on others. They exist for program
652     // correctness. So we do not use them to compute the critical path.
653     predecessor->DecrementNumberOfUnscheduledSuccessors();
654     if (!predecessor->HasUnscheduledSuccessors()) {
655       candidates->push_back(predecessor);
656     }
657   }
658 
659   Schedule(scheduling_node->GetInstruction());
660 }
661 
662 // Move an instruction after cursor instruction inside one basic block.
MoveAfterInBlock(HInstruction * instruction,HInstruction * cursor)663 static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
664   DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
665   DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
666   DCHECK(!instruction->IsControlFlow());
667   DCHECK(!cursor->IsControlFlow());
668   instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false);
669 }
670 
Schedule(HInstruction * instruction)671 void HScheduler::Schedule(HInstruction* instruction) {
672   if (instruction == cursor_) {
673     cursor_ = cursor_->GetPrevious();
674   } else {
675     MoveAfterInBlock(instruction, cursor_);
676   }
677 }
678 
IsSchedulable(const HInstruction * instruction) const679 bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
680   // We want to avoid exhaustively listing all instructions, so we first check
681   // for instruction categories that we know are safe.
682   if (instruction->IsControlFlow() ||
683       instruction->IsConstant()) {
684     return true;
685   }
686   // Currently all unary and binary operations are safe to schedule, so avoid
687   // checking for each of them individually.
688   // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
689   // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
690   // the exhaustive lists here.
691   if (instruction->IsUnaryOperation()) {
692     DCHECK(instruction->IsAbs() ||
693            instruction->IsBooleanNot() ||
694            instruction->IsNot() ||
695            instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
696     return true;
697   }
698   if (instruction->IsBinaryOperation()) {
699     DCHECK(instruction->IsAdd() ||
700            instruction->IsAnd() ||
701            instruction->IsCompare() ||
702            instruction->IsCondition() ||
703            instruction->IsDiv() ||
704            instruction->IsMin() ||
705            instruction->IsMax() ||
706            instruction->IsMul() ||
707            instruction->IsOr() ||
708            instruction->IsRem() ||
709            instruction->IsRor() ||
710            instruction->IsShl() ||
711            instruction->IsShr() ||
712            instruction->IsSub() ||
713            instruction->IsUShr() ||
714            instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
715     return true;
716   }
717   // The scheduler should not see any of these.
718   DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
719   // List of instructions explicitly excluded:
720   //    HClearException
721   //    HClinitCheck
722   //    HDeoptimize
723   //    HLoadClass
724   //    HLoadException
725   //    HMemoryBarrier
726   //    HMonitorOperation
727   //    HNativeDebugInfo
728   //    HThrow
729   //    HTryBoundary
730   // TODO: Some of the instructions above may be safe to schedule (maybe as
731   // scheduling barriers).
732   return instruction->IsArrayGet() ||
733       instruction->IsArraySet() ||
734       instruction->IsArrayLength() ||
735       instruction->IsBoundType() ||
736       instruction->IsBoundsCheck() ||
737       instruction->IsCheckCast() ||
738       instruction->IsClassTableGet() ||
739       instruction->IsCurrentMethod() ||
740       instruction->IsDivZeroCheck() ||
741       (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
742       (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
743       instruction->IsInstanceOf() ||
744       instruction->IsInvokeInterface() ||
745       instruction->IsInvokeStaticOrDirect() ||
746       instruction->IsInvokeUnresolved() ||
747       instruction->IsInvokeVirtual() ||
748       instruction->IsLoadString() ||
749       instruction->IsNewArray() ||
750       instruction->IsNewInstance() ||
751       instruction->IsNullCheck() ||
752       instruction->IsPackedSwitch() ||
753       instruction->IsParameterValue() ||
754       instruction->IsPhi() ||
755       instruction->IsReturn() ||
756       instruction->IsReturnVoid() ||
757       instruction->IsSelect() ||
758       (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
759       (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
760       instruction->IsSuspendCheck() ||
761       instruction->IsTypeConversion();
762 }
763 
IsSchedulable(const HBasicBlock * block) const764 bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
765   // We may be only interested in loop blocks.
766   if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
767     return false;
768   }
769   if (block->GetTryCatchInformation() != nullptr) {
770     // Do not schedule blocks that are part of try-catch.
771     // Because scheduler cannot see if catch block has assumptions on the instruction order in
772     // the try block. In following example, if we enable scheduler for the try block,
773     // MulitiplyAccumulate may be scheduled before DivZeroCheck,
774     // which can result in an incorrect value in the catch block.
775     //   try {
776     //     a = a/b;    // DivZeroCheck
777     //                 // Div
778     //     c = c*d+e;  // MulitiplyAccumulate
779     //   } catch {System.out.print(c); }
780     return false;
781   }
782   // Check whether all instructions in this block are schedulable.
783   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
784     if (!IsSchedulable(it.Current())) {
785       return false;
786     }
787   }
788   return true;
789 }
790 
IsSchedulingBarrier(const HInstruction * instr) const791 bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
792   return instr->IsControlFlow() ||
793       // Don't break calling convention.
794       instr->IsParameterValue() ||
795       // Code generation of goto relies on SuspendCheck's position.
796       instr->IsSuspendCheck();
797 }
798 
Run(bool only_optimize_loop_blocks,bool schedule_randomly)799 bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
800                                  bool schedule_randomly) {
801 #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
802   // Phase-local allocator that allocates scheduler internal data structures like
803   // scheduling nodes, internel nodes map, dependencies, etc.
804   CriticalPathSchedulingNodeSelector critical_path_selector;
805   RandomSchedulingNodeSelector random_selector;
806   SchedulingNodeSelector* selector = schedule_randomly
807       ? static_cast<SchedulingNodeSelector*>(&random_selector)
808       : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
809 #else
810   // Avoid compilation error when compiling for unsupported instruction set.
811   UNUSED(only_optimize_loop_blocks);
812   UNUSED(schedule_randomly);
813   UNUSED(codegen_);
814 #endif
815 
816   switch (instruction_set_) {
817 #ifdef ART_ENABLE_CODEGEN_arm64
818     case InstructionSet::kArm64: {
819       arm64::HSchedulerARM64 scheduler(selector);
820       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
821       scheduler.Schedule(graph_);
822       break;
823     }
824 #endif
825 #if defined(ART_ENABLE_CODEGEN_arm)
826     case InstructionSet::kThumb2:
827     case InstructionSet::kArm: {
828       arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
829       arm::HSchedulerARM scheduler(selector, &arm_latency_visitor);
830       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
831       scheduler.Schedule(graph_);
832       break;
833     }
834 #endif
835     default:
836       break;
837   }
838   return true;
839 }
840 
841 }  // namespace art
842