1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <string>
18
19 #include "scheduler.h"
20
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "data_type-inl.h"
24 #include "prepare_for_register_allocation.h"
25
26 #ifdef ART_ENABLE_CODEGEN_arm64
27 #include "scheduler_arm64.h"
28 #endif
29
30 #ifdef ART_ENABLE_CODEGEN_arm
31 #include "scheduler_arm.h"
32 #endif
33
34 namespace art {
35
AddDependency(SchedulingNode * node,SchedulingNode * dependency,bool is_data_dependency)36 void SchedulingGraph::AddDependency(SchedulingNode* node,
37 SchedulingNode* dependency,
38 bool is_data_dependency) {
39 if (node == nullptr || dependency == nullptr) {
40 // A `nullptr` node indicates an instruction out of scheduling range (eg. in
41 // an other block), so we do not need to add a dependency edge to the graph.
42 return;
43 }
44
45 if (is_data_dependency) {
46 node->AddDataPredecessor(dependency);
47 } else {
48 node->AddOtherPredecessor(dependency);
49 }
50 }
51
HasReorderingDependency(const HInstruction * instr1,const HInstruction * instr2)52 bool SideEffectDependencyAnalysis::HasReorderingDependency(const HInstruction* instr1,
53 const HInstruction* instr2) {
54 SideEffects instr1_side_effects = instr1->GetSideEffects();
55 SideEffects instr2_side_effects = instr2->GetSideEffects();
56
57 // Read after write.
58 if (instr1_side_effects.MayDependOn(instr2_side_effects)) {
59 return true;
60 }
61
62 // Write after read.
63 if (instr2_side_effects.MayDependOn(instr1_side_effects)) {
64 return true;
65 }
66
67 // Memory write after write.
68 if (instr1_side_effects.DoesAnyWrite() && instr2_side_effects.DoesAnyWrite()) {
69 return true;
70 }
71
72 return false;
73 }
74
ArrayAccessHeapLocation(HInstruction * instruction) const75 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessHeapLocation(
76 HInstruction* instruction) const {
77 DCHECK(heap_location_collector_ != nullptr);
78 size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction);
79 // This array access should be analyzed and added to HeapLocationCollector before.
80 DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
81 return heap_loc;
82 }
83
ArrayAccessMayAlias(HInstruction * instr1,HInstruction * instr2) const84 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessMayAlias(
85 HInstruction* instr1, HInstruction* instr2) const {
86 DCHECK(heap_location_collector_ != nullptr);
87 size_t instr1_heap_loc = ArrayAccessHeapLocation(instr1);
88 size_t instr2_heap_loc = ArrayAccessHeapLocation(instr2);
89
90 // For example: arr[0] and arr[0]
91 if (instr1_heap_loc == instr2_heap_loc) {
92 return true;
93 }
94
95 // For example: arr[0] and arr[i]
96 if (heap_location_collector_->MayAlias(instr1_heap_loc, instr2_heap_loc)) {
97 return true;
98 }
99
100 return false;
101 }
102
IsArrayAccess(const HInstruction * instruction)103 static bool IsArrayAccess(const HInstruction* instruction) {
104 return instruction->IsArrayGet() || instruction->IsArraySet();
105 }
106
IsInstanceFieldAccess(const HInstruction * instruction)107 static bool IsInstanceFieldAccess(const HInstruction* instruction) {
108 return instruction->IsInstanceFieldGet() ||
109 instruction->IsInstanceFieldSet() ||
110 instruction->IsUnresolvedInstanceFieldGet() ||
111 instruction->IsUnresolvedInstanceFieldSet();
112 }
113
IsStaticFieldAccess(const HInstruction * instruction)114 static bool IsStaticFieldAccess(const HInstruction* instruction) {
115 return instruction->IsStaticFieldGet() ||
116 instruction->IsStaticFieldSet() ||
117 instruction->IsUnresolvedStaticFieldGet() ||
118 instruction->IsUnresolvedStaticFieldSet();
119 }
120
IsResolvedFieldAccess(const HInstruction * instruction)121 static bool IsResolvedFieldAccess(const HInstruction* instruction) {
122 return instruction->IsInstanceFieldGet() ||
123 instruction->IsInstanceFieldSet() ||
124 instruction->IsStaticFieldGet() ||
125 instruction->IsStaticFieldSet();
126 }
127
IsUnresolvedFieldAccess(const HInstruction * instruction)128 static bool IsUnresolvedFieldAccess(const HInstruction* instruction) {
129 return instruction->IsUnresolvedInstanceFieldGet() ||
130 instruction->IsUnresolvedInstanceFieldSet() ||
131 instruction->IsUnresolvedStaticFieldGet() ||
132 instruction->IsUnresolvedStaticFieldSet();
133 }
134
IsFieldAccess(const HInstruction * instruction)135 static bool IsFieldAccess(const HInstruction* instruction) {
136 return IsResolvedFieldAccess(instruction) || IsUnresolvedFieldAccess(instruction);
137 }
138
GetFieldInfo(const HInstruction * instruction)139 static const FieldInfo* GetFieldInfo(const HInstruction* instruction) {
140 if (instruction->IsInstanceFieldGet()) {
141 return &instruction->AsInstanceFieldGet()->GetFieldInfo();
142 } else if (instruction->IsInstanceFieldSet()) {
143 return &instruction->AsInstanceFieldSet()->GetFieldInfo();
144 } else if (instruction->IsStaticFieldGet()) {
145 return &instruction->AsStaticFieldGet()->GetFieldInfo();
146 } else if (instruction->IsStaticFieldSet()) {
147 return &instruction->AsStaticFieldSet()->GetFieldInfo();
148 } else {
149 LOG(FATAL) << "Unexpected field access type";
150 UNREACHABLE();
151 }
152 }
153
FieldAccessHeapLocation(const HInstruction * instr) const154 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessHeapLocation(
155 const HInstruction* instr) const {
156 DCHECK(instr != nullptr);
157 DCHECK(GetFieldInfo(instr) != nullptr);
158 DCHECK(heap_location_collector_ != nullptr);
159
160 size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(instr->InputAt(0),
161 GetFieldInfo(instr));
162 // This field access should be analyzed and added to HeapLocationCollector before.
163 DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
164
165 return heap_loc;
166 }
167
FieldAccessMayAlias(const HInstruction * instr1,const HInstruction * instr2) const168 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessMayAlias(
169 const HInstruction* instr1, const HInstruction* instr2) const {
170 DCHECK(heap_location_collector_ != nullptr);
171
172 // Static and instance field accesses should not alias.
173 if ((IsInstanceFieldAccess(instr1) && IsStaticFieldAccess(instr2)) ||
174 (IsStaticFieldAccess(instr1) && IsInstanceFieldAccess(instr2))) {
175 return false;
176 }
177
178 // If either of the field accesses is unresolved.
179 if (IsUnresolvedFieldAccess(instr1) || IsUnresolvedFieldAccess(instr2)) {
180 // Conservatively treat these two accesses may alias.
181 return true;
182 }
183
184 // If both fields accesses are resolved.
185 size_t instr1_field_access_heap_loc = FieldAccessHeapLocation(instr1);
186 size_t instr2_field_access_heap_loc = FieldAccessHeapLocation(instr2);
187
188 if (instr1_field_access_heap_loc == instr2_field_access_heap_loc) {
189 return true;
190 }
191
192 if (!heap_location_collector_->MayAlias(instr1_field_access_heap_loc,
193 instr2_field_access_heap_loc)) {
194 return false;
195 }
196
197 return true;
198 }
199
HasMemoryDependency(HInstruction * instr1,HInstruction * instr2) const200 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::HasMemoryDependency(
201 HInstruction* instr1, HInstruction* instr2) const {
202 if (!HasReorderingDependency(instr1, instr2)) {
203 return false;
204 }
205
206 if (heap_location_collector_ == nullptr ||
207 heap_location_collector_->GetNumberOfHeapLocations() == 0) {
208 // Without HeapLocation information from load store analysis,
209 // we cannot do further disambiguation analysis on these two instructions.
210 // Just simply say that those two instructions have memory dependency.
211 return true;
212 }
213
214 if (IsArrayAccess(instr1) && IsArrayAccess(instr2)) {
215 return ArrayAccessMayAlias(instr1, instr2);
216 }
217 if (IsFieldAccess(instr1) && IsFieldAccess(instr2)) {
218 return FieldAccessMayAlias(instr1, instr2);
219 }
220
221 // TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess
222 if (instr1->IsVecMemoryOperation() && instr2->IsVecMemoryOperation()) {
223 return true;
224 }
225 if (instr1->IsVecMemoryOperation() && IsArrayAccess(instr2)) {
226 return true;
227 }
228 if (IsArrayAccess(instr1) && instr2->IsVecMemoryOperation()) {
229 return true;
230 }
231
232 // Heap accesses of different kinds should not alias.
233 if (IsArrayAccess(instr1) && IsFieldAccess(instr2)) {
234 return false;
235 }
236 if (IsFieldAccess(instr1) && IsArrayAccess(instr2)) {
237 return false;
238 }
239 if (instr1->IsVecMemoryOperation() && IsFieldAccess(instr2)) {
240 return false;
241 }
242 if (IsFieldAccess(instr1) && instr2->IsVecMemoryOperation()) {
243 return false;
244 }
245
246 // We conservatively treat all other cases having dependency,
247 // for example, Invoke and ArrayGet.
248 return true;
249 }
250
HasExceptionDependency(const HInstruction * instr1,const HInstruction * instr2)251 bool SideEffectDependencyAnalysis::HasExceptionDependency(const HInstruction* instr1,
252 const HInstruction* instr2) {
253 if (instr2->CanThrow() && instr1->GetSideEffects().DoesAnyWrite()) {
254 return true;
255 }
256 if (instr2->GetSideEffects().DoesAnyWrite() && instr1->CanThrow()) {
257 return true;
258 }
259 if (instr2->CanThrow() && instr1->CanThrow()) {
260 return true;
261 }
262
263 // Above checks should cover all cases where we cannot reorder two
264 // instructions which may throw exception.
265 return false;
266 }
267
268 // Check if the specified instruction is a better candidate which more likely will
269 // have other instructions depending on it.
IsBetterCandidateWithMoreLikelyDependencies(HInstruction * new_candidate,HInstruction * old_candidate)270 static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate,
271 HInstruction* old_candidate) {
272 if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) {
273 // Weaker side effects.
274 return false;
275 }
276 if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) {
277 // Same side effects, check if `new_candidate` has stronger `CanThrow()`.
278 return new_candidate->CanThrow() && !old_candidate->CanThrow();
279 } else {
280 // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`.
281 return new_candidate->CanThrow() || !old_candidate->CanThrow();
282 }
283 }
284
AddCrossIterationDependencies(SchedulingNode * node)285 void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) {
286 for (HInstruction* instruction : node->GetInstruction()->GetInputs()) {
287 // Having a phi-function from a loop header as an input means the current node of the
288 // scheduling graph has a cross-iteration dependency because such phi-functions bring values
289 // from the previous iteration to the current iteration.
290 if (!instruction->IsLoopHeaderPhi()) {
291 continue;
292 }
293 for (HInstruction* phi_input : instruction->GetInputs()) {
294 // As a scheduling graph of the current basic block is built by
295 // processing instructions bottom-up, nullptr returned by GetNode means
296 // an instruction defining a value for the phi is either before the
297 // instruction represented by node or it is in a different basic block.
298 SchedulingNode* def_node = GetNode(phi_input);
299
300 // We don't create a dependency if there are uses besides the use in phi.
301 // In such cases a register to hold phi_input is usually allocated and
302 // a MOV instruction is generated. In cases with multiple uses and no MOV
303 // instruction, reordering creating a MOV instruction can improve
304 // performance more than an attempt to avoid a MOV instruction.
305 if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) {
306 // We have an implicit data dependency between node and def_node.
307 // AddAddDataDependency cannot be used because it is for explicit data dependencies.
308 // So AddOtherDependency is used.
309 AddOtherDependency(def_node, node);
310 }
311 }
312 }
313 }
314
AddDependencies(SchedulingNode * instruction_node,bool is_scheduling_barrier)315 void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node,
316 bool is_scheduling_barrier) {
317 HInstruction* instruction = instruction_node->GetInstruction();
318
319 // Define-use dependencies.
320 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
321 AddDataDependency(GetNode(use.GetUser()), instruction_node);
322 }
323
324 // Scheduling barrier dependencies.
325 DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_);
326 if (contains_scheduling_barrier_) {
327 // A barrier depends on instructions after it. And instructions before the
328 // barrier depend on it.
329 for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
330 SchedulingNode* other_node = GetNode(other);
331 CHECK(other_node != nullptr)
332 << other->DebugName()
333 << " is in block " << other->GetBlock()->GetBlockId()
334 << ", and expected in block " << instruction->GetBlock()->GetBlockId();
335 bool other_is_barrier = other_node->IsSchedulingBarrier();
336 if (is_scheduling_barrier || other_is_barrier) {
337 AddOtherDependency(other_node, instruction_node);
338 }
339 if (other_is_barrier) {
340 // This other scheduling barrier guarantees ordering of instructions after
341 // it, so avoid creating additional useless dependencies in the graph.
342 // For example if we have
343 // instr_1
344 // barrier_2
345 // instr_3
346 // barrier_4
347 // instr_5
348 // we only create the following non-data dependencies
349 // 1 -> 2
350 // 2 -> 3
351 // 2 -> 4
352 // 3 -> 4
353 // 4 -> 5
354 // and do not create
355 // 1 -> 4
356 // 2 -> 5
357 // Note that in this example we could also avoid creating the dependency
358 // `2 -> 4`. But if we remove `instr_3` that dependency is required to
359 // order the barriers. So we generate it to avoid a special case.
360 break;
361 }
362 }
363 }
364
365 // Side effect dependencies.
366 if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
367 HInstruction* dep_chain_candidate = nullptr;
368 for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
369 SchedulingNode* other_node = GetNode(other);
370 if (other_node->IsSchedulingBarrier()) {
371 // We have reached a scheduling barrier so we can stop further
372 // processing.
373 //
374 // As a "other" dependency is not set up if a data dependency exists, we need to check that
375 // one of them must exist.
376 DCHECK(other_node->HasOtherDependency(instruction_node)
377 || other_node->HasDataDependency(instruction_node));
378 break;
379 }
380 if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) {
381 if (dep_chain_candidate != nullptr &&
382 side_effect_dependency_analysis_.HasSideEffectDependency(other, dep_chain_candidate)) {
383 // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency.
384 } else {
385 AddOtherDependency(other_node, instruction_node);
386 }
387 // Check if `other` is a better candidate which more likely will have other instructions
388 // depending on it.
389 if (dep_chain_candidate == nullptr ||
390 IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) {
391 dep_chain_candidate = other;
392 }
393 }
394 }
395 }
396
397 // Environment dependencies.
398 // We do not need to process those if the instruction is a scheduling barrier,
399 // since the barrier already has non-data dependencies on all following
400 // instructions.
401 if (!is_scheduling_barrier) {
402 for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
403 // Note that here we could stop processing if the environment holder is
404 // across a scheduling barrier. But checking this would likely require
405 // more work than simply iterating through environment uses.
406 AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
407 }
408 }
409
410 AddCrossIterationDependencies(instruction_node);
411 }
412
InstructionTypeId(const HInstruction * instruction)413 static const std::string InstructionTypeId(const HInstruction* instruction) {
414 return DataType::TypeId(instruction->GetType()) + std::to_string(instruction->GetId());
415 }
416
417 // Ideally we would reuse the graph visualizer code, but it is not available
418 // from here and it is not worth moving all that code only for our use.
DumpAsDotNode(std::ostream & output,const SchedulingNode * node)419 static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
420 const HInstruction* instruction = node->GetInstruction();
421 // Use the instruction typed id as the node identifier.
422 std::string instruction_id = InstructionTypeId(instruction);
423 output << instruction_id << "[shape=record, label=\""
424 << instruction_id << ' ' << instruction->DebugName() << " [";
425 // List the instruction's inputs in its description. When visualizing the
426 // graph this helps differentiating data inputs from other dependencies.
427 const char* seperator = "";
428 for (const HInstruction* input : instruction->GetInputs()) {
429 output << seperator << InstructionTypeId(input);
430 seperator = ",";
431 }
432 output << "]";
433 // Other properties of the node.
434 output << "\\ninternal_latency: " << node->GetInternalLatency();
435 output << "\\ncritical_path: " << node->GetCriticalPath();
436 if (node->IsSchedulingBarrier()) {
437 output << "\\n(barrier)";
438 }
439 output << "\"];\n";
440 // We want program order to go from top to bottom in the graph output, so we
441 // reverse the edges and specify `dir=back`.
442 for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
443 const HInstruction* predecessor_instruction = predecessor->GetInstruction();
444 output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
445 << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
446 }
447 for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
448 const HInstruction* predecessor_instruction = predecessor->GetInstruction();
449 output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
450 << "[dir=back,color=blue]\n";
451 }
452 }
453
DumpAsDotGraph(const std::string & description,const ScopedArenaVector<SchedulingNode * > & initial_candidates)454 void SchedulingGraph::DumpAsDotGraph(const std::string& description,
455 const ScopedArenaVector<SchedulingNode*>& initial_candidates) {
456 // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
457 // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
458 std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
459 // Description of this graph, as a comment.
460 output << "// " << description << "\n";
461 // Start the dot graph. Use an increasing index for easier differentiation.
462 output << "digraph G {\n";
463 for (const auto& entry : nodes_map_) {
464 SchedulingNode* node = entry.second.get();
465 DumpAsDotNode(output, node);
466 }
467 // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
468 for (SchedulingNode* node : initial_candidates) {
469 const HInstruction* instruction = node->GetInstruction();
470 output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
471 << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
472 }
473 // End of the dot graph.
474 output << "}\n";
475 output.close();
476 }
477
SelectMaterializedCondition(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph) const478 SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
479 ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
480 // Schedule condition inputs that can be materialized immediately before their use.
481 // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
482 // immediately, because it is a materialized condition, and will be emitted right before HSelect
483 // in codegen phase.
484 //
485 // i20 HLessThan [...] HLessThan HAdd HAdd
486 // i21 HAdd [...] ===> | | |
487 // i22 HAdd [...] +----------+---------+
488 // i23 HSelect [i21, i22, i20] HSelect
489
490 if (prev_select_ == nullptr) {
491 return nullptr;
492 }
493
494 const HInstruction* instruction = prev_select_->GetInstruction();
495 const HCondition* condition = nullptr;
496 DCHECK(instruction != nullptr);
497
498 if (instruction->IsIf()) {
499 condition = instruction->AsIf()->InputAt(0)->AsCondition();
500 } else if (instruction->IsSelect()) {
501 condition = instruction->AsSelect()->GetCondition()->AsCondition();
502 }
503
504 SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
505
506 if ((condition_node != nullptr) &&
507 condition->HasOnlyOneNonEnvironmentUse() &&
508 ContainsElement(*nodes, condition_node)) {
509 DCHECK(!condition_node->HasUnscheduledSuccessors());
510 // Remove the condition from the list of candidates and schedule it.
511 RemoveElement(*nodes, condition_node);
512 return condition_node;
513 }
514
515 return nullptr;
516 }
517
PopHighestPriorityNode(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph)518 SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
519 ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
520 DCHECK(!nodes->empty());
521 SchedulingNode* select_node = nullptr;
522
523 // Optimize for materialized condition and its emit before use scenario.
524 select_node = SelectMaterializedCondition(nodes, graph);
525
526 if (select_node == nullptr) {
527 // Get highest priority node based on critical path information.
528 select_node = (*nodes)[0];
529 size_t select = 0;
530 for (size_t i = 1, e = nodes->size(); i < e; i++) {
531 SchedulingNode* check = (*nodes)[i];
532 SchedulingNode* candidate = (*nodes)[select];
533 select_node = GetHigherPrioritySchedulingNode(candidate, check);
534 if (select_node == check) {
535 select = i;
536 }
537 }
538 DeleteNodeAtIndex(nodes, select);
539 }
540
541 prev_select_ = select_node;
542 return select_node;
543 }
544
GetHigherPrioritySchedulingNode(SchedulingNode * candidate,SchedulingNode * check) const545 SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
546 SchedulingNode* candidate, SchedulingNode* check) const {
547 uint32_t candidate_path = candidate->GetCriticalPath();
548 uint32_t check_path = check->GetCriticalPath();
549 // First look at the critical_path.
550 if (check_path != candidate_path) {
551 return check_path < candidate_path ? check : candidate;
552 }
553 // If both critical paths are equal, schedule instructions with a higher latency
554 // first in program order.
555 return check->GetLatency() < candidate->GetLatency() ? check : candidate;
556 }
557
Schedule(HGraph * graph)558 void HScheduler::Schedule(HGraph* graph) {
559 // We run lsa here instead of in a separate pass to better control whether we
560 // should run the analysis or not.
561 const HeapLocationCollector* heap_location_collector = nullptr;
562 ScopedArenaAllocator allocator(graph->GetArenaStack());
563 LoadStoreAnalysis lsa(graph, &allocator);
564 if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
565 lsa.Run();
566 heap_location_collector = &lsa.GetHeapLocationCollector();
567 }
568
569 for (HBasicBlock* block : graph->GetReversePostOrder()) {
570 if (IsSchedulable(block)) {
571 Schedule(block, heap_location_collector);
572 }
573 }
574 }
575
Schedule(HBasicBlock * block,const HeapLocationCollector * heap_location_collector)576 void HScheduler::Schedule(HBasicBlock* block,
577 const HeapLocationCollector* heap_location_collector) {
578 ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack());
579 ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler));
580
581 // Build the scheduling graph.
582 SchedulingGraph scheduling_graph(&allocator, heap_location_collector);
583 for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
584 HInstruction* instruction = it.Current();
585 CHECK_EQ(instruction->GetBlock(), block)
586 << instruction->DebugName()
587 << " is in block " << instruction->GetBlock()->GetBlockId()
588 << ", and expected in block " << block->GetBlockId();
589 SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction));
590 CalculateLatency(node);
591 scheduling_nodes.push_back(node);
592 }
593
594 if (scheduling_graph.Size() <= 1) {
595 return;
596 }
597
598 cursor_ = block->GetLastInstruction();
599
600 // The list of candidates for scheduling. A node becomes a candidate when all
601 // its predecessors have been scheduled.
602 ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler));
603
604 // Find the initial candidates for scheduling.
605 for (SchedulingNode* node : scheduling_nodes) {
606 if (!node->HasUnscheduledSuccessors()) {
607 node->MaybeUpdateCriticalPath(node->GetLatency());
608 candidates.push_back(node);
609 }
610 }
611
612 ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler));
613 if (kDumpDotSchedulingGraphs) {
614 // Remember the list of initial candidates for debug output purposes.
615 initial_candidates.assign(candidates.begin(), candidates.end());
616 }
617
618 // Schedule all nodes.
619 selector_->Reset();
620 while (!candidates.empty()) {
621 SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph);
622 Schedule(node, &candidates);
623 }
624
625 if (kDumpDotSchedulingGraphs) {
626 // Dump the graph in `dot` format.
627 HGraph* graph = block->GetGraph();
628 std::stringstream description;
629 description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
630 << " B" << block->GetBlockId();
631 scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates);
632 }
633 }
634
Schedule(SchedulingNode * scheduling_node,ScopedArenaVector<SchedulingNode * > * candidates)635 void HScheduler::Schedule(SchedulingNode* scheduling_node,
636 /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) {
637 // Check whether any of the node's predecessors will be valid candidates after
638 // this node is scheduled.
639 uint32_t path_to_node = scheduling_node->GetCriticalPath();
640 for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
641 predecessor->MaybeUpdateCriticalPath(
642 path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
643 predecessor->DecrementNumberOfUnscheduledSuccessors();
644 if (!predecessor->HasUnscheduledSuccessors()) {
645 candidates->push_back(predecessor);
646 }
647 }
648 for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
649 // Do not update the critical path.
650 // The 'other' (so 'non-data') dependencies (usually) do not represent a
651 // 'material' dependency of nodes on others. They exist for program
652 // correctness. So we do not use them to compute the critical path.
653 predecessor->DecrementNumberOfUnscheduledSuccessors();
654 if (!predecessor->HasUnscheduledSuccessors()) {
655 candidates->push_back(predecessor);
656 }
657 }
658
659 Schedule(scheduling_node->GetInstruction());
660 }
661
662 // Move an instruction after cursor instruction inside one basic block.
MoveAfterInBlock(HInstruction * instruction,HInstruction * cursor)663 static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
664 DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
665 DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
666 DCHECK(!instruction->IsControlFlow());
667 DCHECK(!cursor->IsControlFlow());
668 instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false);
669 }
670
Schedule(HInstruction * instruction)671 void HScheduler::Schedule(HInstruction* instruction) {
672 if (instruction == cursor_) {
673 cursor_ = cursor_->GetPrevious();
674 } else {
675 MoveAfterInBlock(instruction, cursor_);
676 }
677 }
678
IsSchedulable(const HInstruction * instruction) const679 bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
680 // We want to avoid exhaustively listing all instructions, so we first check
681 // for instruction categories that we know are safe.
682 if (instruction->IsControlFlow() ||
683 instruction->IsConstant()) {
684 return true;
685 }
686 // Currently all unary and binary operations are safe to schedule, so avoid
687 // checking for each of them individually.
688 // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
689 // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
690 // the exhaustive lists here.
691 if (instruction->IsUnaryOperation()) {
692 DCHECK(instruction->IsAbs() ||
693 instruction->IsBooleanNot() ||
694 instruction->IsNot() ||
695 instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
696 return true;
697 }
698 if (instruction->IsBinaryOperation()) {
699 DCHECK(instruction->IsAdd() ||
700 instruction->IsAnd() ||
701 instruction->IsCompare() ||
702 instruction->IsCondition() ||
703 instruction->IsDiv() ||
704 instruction->IsMin() ||
705 instruction->IsMax() ||
706 instruction->IsMul() ||
707 instruction->IsOr() ||
708 instruction->IsRem() ||
709 instruction->IsRor() ||
710 instruction->IsShl() ||
711 instruction->IsShr() ||
712 instruction->IsSub() ||
713 instruction->IsUShr() ||
714 instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
715 return true;
716 }
717 // The scheduler should not see any of these.
718 DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
719 // List of instructions explicitly excluded:
720 // HClearException
721 // HClinitCheck
722 // HDeoptimize
723 // HLoadClass
724 // HLoadException
725 // HMemoryBarrier
726 // HMonitorOperation
727 // HNativeDebugInfo
728 // HThrow
729 // HTryBoundary
730 // TODO: Some of the instructions above may be safe to schedule (maybe as
731 // scheduling barriers).
732 return instruction->IsArrayGet() ||
733 instruction->IsArraySet() ||
734 instruction->IsArrayLength() ||
735 instruction->IsBoundType() ||
736 instruction->IsBoundsCheck() ||
737 instruction->IsCheckCast() ||
738 instruction->IsClassTableGet() ||
739 instruction->IsCurrentMethod() ||
740 instruction->IsDivZeroCheck() ||
741 (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
742 (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
743 instruction->IsInstanceOf() ||
744 instruction->IsInvokeInterface() ||
745 instruction->IsInvokeStaticOrDirect() ||
746 instruction->IsInvokeUnresolved() ||
747 instruction->IsInvokeVirtual() ||
748 instruction->IsLoadString() ||
749 instruction->IsNewArray() ||
750 instruction->IsNewInstance() ||
751 instruction->IsNullCheck() ||
752 instruction->IsPackedSwitch() ||
753 instruction->IsParameterValue() ||
754 instruction->IsPhi() ||
755 instruction->IsReturn() ||
756 instruction->IsReturnVoid() ||
757 instruction->IsSelect() ||
758 (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
759 (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
760 instruction->IsSuspendCheck() ||
761 instruction->IsTypeConversion();
762 }
763
IsSchedulable(const HBasicBlock * block) const764 bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
765 // We may be only interested in loop blocks.
766 if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
767 return false;
768 }
769 if (block->GetTryCatchInformation() != nullptr) {
770 // Do not schedule blocks that are part of try-catch.
771 // Because scheduler cannot see if catch block has assumptions on the instruction order in
772 // the try block. In following example, if we enable scheduler for the try block,
773 // MulitiplyAccumulate may be scheduled before DivZeroCheck,
774 // which can result in an incorrect value in the catch block.
775 // try {
776 // a = a/b; // DivZeroCheck
777 // // Div
778 // c = c*d+e; // MulitiplyAccumulate
779 // } catch {System.out.print(c); }
780 return false;
781 }
782 // Check whether all instructions in this block are schedulable.
783 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
784 if (!IsSchedulable(it.Current())) {
785 return false;
786 }
787 }
788 return true;
789 }
790
IsSchedulingBarrier(const HInstruction * instr) const791 bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
792 return instr->IsControlFlow() ||
793 // Don't break calling convention.
794 instr->IsParameterValue() ||
795 // Code generation of goto relies on SuspendCheck's position.
796 instr->IsSuspendCheck();
797 }
798
Run(bool only_optimize_loop_blocks,bool schedule_randomly)799 bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
800 bool schedule_randomly) {
801 #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
802 // Phase-local allocator that allocates scheduler internal data structures like
803 // scheduling nodes, internel nodes map, dependencies, etc.
804 CriticalPathSchedulingNodeSelector critical_path_selector;
805 RandomSchedulingNodeSelector random_selector;
806 SchedulingNodeSelector* selector = schedule_randomly
807 ? static_cast<SchedulingNodeSelector*>(&random_selector)
808 : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
809 #else
810 // Avoid compilation error when compiling for unsupported instruction set.
811 UNUSED(only_optimize_loop_blocks);
812 UNUSED(schedule_randomly);
813 UNUSED(codegen_);
814 #endif
815
816 switch (instruction_set_) {
817 #ifdef ART_ENABLE_CODEGEN_arm64
818 case InstructionSet::kArm64: {
819 arm64::HSchedulerARM64 scheduler(selector);
820 scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
821 scheduler.Schedule(graph_);
822 break;
823 }
824 #endif
825 #if defined(ART_ENABLE_CODEGEN_arm)
826 case InstructionSet::kThumb2:
827 case InstructionSet::kArm: {
828 arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
829 arm::HSchedulerARM scheduler(selector, &arm_latency_visitor);
830 scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
831 scheduler.Schedule(graph_);
832 break;
833 }
834 #endif
835 default:
836 break;
837 }
838 return true;
839 }
840
841 } // namespace art
842