1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "parallel_move_resolver.h"
18 
19 #include "base/stl_util.h"
20 #include "nodes.h"
21 
22 namespace art {
23 
BuildInitialMoveList(HParallelMove * parallel_move)24 void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
25   // Perform a linear sweep of the moves to add them to the initial list of
26   // moves to perform, ignoring any move that is redundant (the source is
27   // the same as the destination, the destination is ignored and
28   // unallocated, or the move was already eliminated).
29   for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
30     MoveOperands* move = parallel_move->MoveOperandsAt(i);
31     if (!move->IsRedundant()) {
32       moves_.push_back(move);
33     }
34   }
35 }
36 
EmitNativeCode(HParallelMove * parallel_move)37 void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
38   DCHECK(moves_.empty());
39   // Build up a worklist of moves.
40   BuildInitialMoveList(parallel_move);
41 
42   // Move stack/stack slot to take advantage of a free register on constrained machines.
43   for (size_t i = 0; i < moves_.size(); ++i) {
44     const MoveOperands& move = *moves_[i];
45     // Ignore constants and moves already eliminated.
46     if (move.IsEliminated() || move.GetSource().IsConstant()) {
47       continue;
48     }
49 
50     if ((move.GetSource().IsStackSlot() || move.GetSource().IsDoubleStackSlot()) &&
51         (move.GetDestination().IsStackSlot() || move.GetDestination().IsDoubleStackSlot())) {
52       PerformMove(i);
53     }
54   }
55 
56   for (size_t i = 0; i < moves_.size(); ++i) {
57     const MoveOperands& move = *moves_[i];
58     // Skip constants to perform them last.  They don't block other moves
59     // and skipping such moves with register destinations keeps those
60     // registers free for the whole algorithm.
61     if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
62       PerformMove(i);
63     }
64   }
65 
66   // Perform the moves with constant sources.
67   for (size_t i = 0; i < moves_.size(); ++i) {
68     MoveOperands* move = moves_[i];
69     if (!move->IsEliminated()) {
70       DCHECK(move->GetSource().IsConstant());
71       EmitMove(i);
72       // Eliminate the move, in case following moves need a scratch register.
73       move->Eliminate();
74     }
75   }
76 
77   moves_.clear();
78 }
79 
LowOf(Location location)80 Location LowOf(Location location) {
81   if (location.IsRegisterPair()) {
82     return Location::RegisterLocation(location.low());
83   } else if (location.IsFpuRegisterPair()) {
84     return Location::FpuRegisterLocation(location.low());
85   } else if (location.IsDoubleStackSlot()) {
86     return Location::StackSlot(location.GetStackIndex());
87   } else {
88     return Location::NoLocation();
89   }
90 }
91 
HighOf(Location location)92 Location HighOf(Location location) {
93   if (location.IsRegisterPair()) {
94     return Location::RegisterLocation(location.high());
95   } else if (location.IsFpuRegisterPair()) {
96     return Location::FpuRegisterLocation(location.high());
97   } else if (location.IsDoubleStackSlot()) {
98     return Location::StackSlot(location.GetHighStackIndex(4));
99   } else {
100     return Location::NoLocation();
101   }
102 }
103 
104 // Update the source of `move`, knowing that `updated_location` has been swapped
105 // with `new_source`. Note that `updated_location` can be a pair, therefore if
106 // `move` is non-pair, we need to extract which register to use.
UpdateSourceOf(MoveOperands * move,Location updated_location,Location new_source)107 static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
108   Location source = move->GetSource();
109   if (LowOf(updated_location).Equals(source)) {
110     move->SetSource(LowOf(new_source));
111   } else if (HighOf(updated_location).Equals(source)) {
112     move->SetSource(HighOf(new_source));
113   } else {
114     DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
115     move->SetSource(new_source);
116   }
117 }
118 
PerformMove(size_t index)119 MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
120   // Each call to this function performs a move and deletes it from the move
121   // graph.  We first recursively perform any move blocking this one.  We
122   // mark a move as "pending" on entry to PerformMove in order to detect
123   // cycles in the move graph.  We use operand swaps to resolve cycles,
124   // which means that a call to PerformMove could change any source operand
125   // in the move graph.
126 
127   MoveOperands* move = moves_[index];
128   DCHECK(!move->IsPending());
129   if (move->IsRedundant()) {
130     // Because we swap register pairs first, following, un-pending
131     // moves may become redundant.
132     move->Eliminate();
133     return nullptr;
134   }
135 
136   // Clear this move's destination to indicate a pending move.  The actual
137   // destination is saved in a stack-allocated local.  Recursion may allow
138   // multiple moves to be pending.
139   DCHECK(!move->GetSource().IsInvalid());
140   Location destination = move->MarkPending();
141 
142   // Perform a depth-first traversal of the move graph to resolve
143   // dependencies.  Any unperformed, unpending move with a source the same
144   // as this one's destination blocks this one so recursively perform all
145   // such moves.
146   MoveOperands* required_swap = nullptr;
147   for (size_t i = 0; i < moves_.size(); ++i) {
148     const MoveOperands& other_move = *moves_[i];
149     if (other_move.Blocks(destination) && !other_move.IsPending()) {
150       // Though PerformMove can change any source operand in the move graph,
151       // calling `PerformMove` cannot create a blocking move via a swap
152       // (this loop does not miss any).
153       // For example, assume there is a non-blocking move with source A
154       // and this move is blocked on source B and there is a swap of A and
155       // B.  Then A and B must be involved in the same cycle (or they would
156       // not be swapped).  Since this move's destination is B and there is
157       // only a single incoming edge to an operand, this move must also be
158       // involved in the same cycle.  In that case, the blocking move will
159       // be created but will be "pending" when we return from PerformMove.
160       required_swap = PerformMove(i);
161 
162       if (required_swap == move) {
163         // If this move is required to swap, we do so without looking
164         // at the next moves. Swapping is not blocked by anything, it just
165         // updates other moves's source.
166         break;
167       } else if (required_swap == moves_[i]) {
168         // If `other_move` was swapped, we iterate again to find a new
169         // potential cycle.
170         required_swap = nullptr;
171         i = -1;
172       } else if (required_swap != nullptr) {
173         // A move is required to swap. We walk back the cycle to find the
174         // move by just returning from this `PerformMove`.
175         moves_[index]->ClearPending(destination);
176         return required_swap;
177       }
178     }
179   }
180 
181   // We are about to resolve this move and don't need it marked as
182   // pending, so restore its destination.
183   move->ClearPending(destination);
184 
185   // This move's source may have changed due to swaps to resolve cycles and
186   // so it may now be the last move in the cycle.  If so remove it.
187   if (move->GetSource().Equals(destination)) {
188     move->Eliminate();
189     DCHECK(required_swap == nullptr);
190     return nullptr;
191   }
192 
193   // The move may be blocked on a (at most one) pending move, in which case
194   // we have a cycle.  Search for such a blocking move and perform a swap to
195   // resolve it.
196   bool do_swap = false;
197   if (required_swap != nullptr) {
198     DCHECK_EQ(required_swap, move);
199     do_swap = true;
200   } else {
201     for (MoveOperands* other_move : moves_) {
202       if (other_move->Blocks(destination)) {
203         DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
204         if (!move->Is64BitMove() && other_move->Is64BitMove()) {
205           // We swap 64bits moves before swapping 32bits moves. Go back from the
206           // cycle by returning the move that must be swapped.
207           return other_move;
208         }
209         do_swap = true;
210         break;
211       }
212     }
213   }
214 
215   if (do_swap) {
216     EmitSwap(index);
217     // Any unperformed (including pending) move with a source of either
218     // this move's source or destination needs to have their source
219     // changed to reflect the state of affairs after the swap.
220     Location source = move->GetSource();
221     Location swap_destination = move->GetDestination();
222     move->Eliminate();
223     for (MoveOperands* other_move : moves_) {
224       if (other_move->Blocks(source)) {
225         UpdateSourceOf(other_move, source, swap_destination);
226       } else if (other_move->Blocks(swap_destination)) {
227         UpdateSourceOf(other_move, swap_destination, source);
228       }
229     }
230     // If the swap was required because of a 64bits move in the middle of a cycle,
231     // we return the swapped move, so that the caller knows it needs to re-iterate
232     // its dependency loop.
233     return required_swap;
234   } else {
235     // This move is not blocked.
236     EmitMove(index);
237     move->Eliminate();
238     DCHECK(required_swap == nullptr);
239     return nullptr;
240   }
241 }
242 
IsScratchLocation(Location loc)243 bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
244   for (MoveOperands* move : moves_) {
245     if (move->Blocks(loc)) {
246       return false;
247     }
248   }
249 
250   for (MoveOperands* move : moves_) {
251     if (move->GetDestination().Equals(loc)) {
252       return true;
253     }
254   }
255 
256   return false;
257 }
258 
AllocateScratchRegister(int blocked,int register_count,int if_scratch,bool * spilled)259 int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
260                                                           int register_count,
261                                                           int if_scratch,
262                                                           bool* spilled) {
263   DCHECK_NE(blocked, if_scratch);
264   int scratch = -1;
265   for (int reg = 0; reg < register_count; ++reg) {
266     if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
267       scratch = reg;
268       break;
269     }
270   }
271 
272   if (scratch == -1) {
273     *spilled = true;
274     scratch = if_scratch;
275   } else {
276     *spilled = false;
277   }
278 
279   return scratch;
280 }
281 
282 
ScratchRegisterScope(ParallelMoveResolverWithSwap * resolver,int blocked,int if_scratch,int number_of_registers)283 ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
284     ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
285     : resolver_(resolver),
286       reg_(kNoRegister),
287       spilled_(false) {
288   reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
289 
290   if (spilled_) {
291     resolver->SpillScratch(reg_);
292   }
293 }
294 
295 
~ScratchRegisterScope()296 ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
297   if (spilled_) {
298     resolver_->RestoreScratch(reg_);
299   }
300 }
301 
EmitNativeCode(HParallelMove * parallel_move)302 void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
303   DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
304   DCHECK(moves_.empty());
305   DCHECK(scratches_.empty());
306 
307   // Backend dependent initialization.
308   PrepareForEmitNativeCode();
309 
310   // Build up a worklist of moves.
311   BuildInitialMoveList(parallel_move);
312 
313   for (size_t i = 0; i < moves_.size(); ++i) {
314     const MoveOperands& move = *moves_[i];
315     // Skip constants to perform them last. They don't block other moves and
316     // skipping such moves with register destinations keeps those registers
317     // free for the whole algorithm.
318     if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
319       PerformMove(i);
320     }
321   }
322 
323   // Perform the moves with constant sources and register destinations with UpdateMoveSource()
324   // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
325   // from changing the constant sources to stack locations.
326   for (size_t i = 0; i < moves_.size(); ++i) {
327     MoveOperands* move = moves_[i];
328     Location destination = move->GetDestination();
329     if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
330       Location source = move->GetSource();
331       EmitMove(i);
332       move->Eliminate();
333       // This may introduce additional instruction dependency, but reduce number
334       // of moves and possible literal loads. For example,
335       // Original moves:
336       //   1234.5678 -> D0
337       //   1234.5678 -> D1
338       // Updated moves:
339       //   1234.5678 -> D0
340       //   D0 -> D1
341       UpdateMoveSource(source, destination);
342     }
343   }
344 
345   // Perform the rest of the moves.
346   for (size_t i = 0; i < moves_.size(); ++i) {
347     MoveOperands* move = moves_[i];
348     if (!move->IsEliminated()) {
349       EmitMove(i);
350       move->Eliminate();
351     }
352   }
353 
354   // All pending moves that we have added for resolve cycles should be performed.
355   DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
356 
357   // Backend dependent cleanup.
358   FinishEmitNativeCode();
359 
360   moves_.clear();
361   scratches_.clear();
362 }
363 
GetScratchLocation(Location::Kind kind)364 Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
365   for (Location loc : scratches_) {
366     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
367       return loc;
368     }
369   }
370   for (MoveOperands* move : moves_) {
371     Location loc = move->GetDestination();
372     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
373       return loc;
374     }
375   }
376   return Location::NoLocation();
377 }
378 
AddScratchLocation(Location loc)379 void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
380   if (kIsDebugBuild) {
381     for (Location scratch : scratches_) {
382       CHECK(!loc.Equals(scratch));
383     }
384   }
385   scratches_.push_back(loc);
386 }
387 
RemoveScratchLocation(Location loc)388 void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
389   DCHECK(!IsBlockedByMoves(loc));
390   for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) {
391     if (loc.Equals(*it)) {
392       scratches_.erase(it);
393       break;
394     }
395   }
396 }
397 
PerformMove(size_t index)398 void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
399   // Each call to this function performs a move and deletes it from the move
400   // graph. We first recursively perform any move blocking this one. We mark
401   // a move as "pending" on entry to PerformMove in order to detect cycles
402   // in the move graph. We use scratch location to resolve cycles, also
403   // additional pending moves might be added. After move has been performed,
404   // we will update source operand in the move graph to reduce dependencies in
405   // the graph.
406 
407   MoveOperands* move = moves_[index];
408   DCHECK(!move->IsPending());
409   DCHECK(!move->IsEliminated());
410   if (move->IsRedundant()) {
411     // Previous operations on the list of moves have caused this particular move
412     // to become a no-op, so we can safely eliminate it. Consider for example
413     // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
414     // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
415     // used as the scratch location, the move (1 -> 2) will occur while resolving
416     // the cycle. When that move is emitted, the code will update moves with a '1'
417     // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
418     // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
419     // eliminated here.
420     move->Eliminate();
421     return;
422   }
423 
424   // Clear this move's destination to indicate a pending move. The actual
425   // destination is saved in a stack-allocated local. Recursion may allow
426   // multiple moves to be pending.
427   DCHECK(!move->GetSource().IsInvalid());
428   Location destination = move->MarkPending();
429 
430   // Perform a depth-first traversal of the move graph to resolve
431   // dependencies. Any unperformed, unpending move with a source the same
432   // as this one's destination blocks this one so recursively perform all
433   // such moves.
434   for (size_t i = 0; i < moves_.size(); ++i) {
435     const MoveOperands& other_move = *moves_[i];
436     if (other_move.Blocks(destination) && !other_move.IsPending()) {
437       PerformMove(i);
438     }
439   }
440 
441   // We are about to resolve this move and don't need it marked as
442   // pending, so restore its destination.
443   move->ClearPending(destination);
444 
445   // No one else should write to the move destination when the it is pending.
446   DCHECK(!move->IsRedundant());
447 
448   Location source = move->GetSource();
449   // The move may be blocked on several pending moves, in case we have a cycle.
450   if (IsBlockedByMoves(destination)) {
451     // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
452     // sequence:
453     // (C -> scratch)     # Emit right now.
454     // (A -> B) (B -> C)  # Unblocked.
455     // (scratch -> A)     # Add to pending_moves_, blocked by (A -> B).
456     Location::Kind kind = source.GetKind();
457     DCHECK_NE(kind, Location::kConstant);
458     Location scratch = AllocateScratchLocationFor(kind);
459     // We only care about the move size.
460     DataType::Type type = move->Is64BitMove() ? DataType::Type::kInt64 : DataType::Type::kInt32;
461     // Perform (C -> scratch)
462     move->SetDestination(scratch);
463     EmitMove(index);
464     move->Eliminate();
465     UpdateMoveSource(source, scratch);
466     // Add (scratch -> A).
467     AddPendingMove(scratch, destination, type);
468   } else {
469     // This move is not blocked.
470     EmitMove(index);
471     move->Eliminate();
472     UpdateMoveSource(source, destination);
473   }
474 
475   // Moves in the pending list should not block any other moves. But performing
476   // unblocked moves in the pending list can free scratch registers, so we do this
477   // as early as possible.
478   MoveOperands* pending_move;
479   while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
480     Location pending_source = pending_move->GetSource();
481     Location pending_destination = pending_move->GetDestination();
482     // We do not depend on the pending move index. So just delete the move instead
483     // of eliminating it to make the pending list cleaner.
484     DeletePendingMove(pending_move);
485     move->SetSource(pending_source);
486     move->SetDestination(pending_destination);
487     EmitMove(index);
488     move->Eliminate();
489     UpdateMoveSource(pending_source, pending_destination);
490     // Free any unblocked locations in the scratch location list.
491     // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop.
492     // FIXME: If FreeScratchLocation() removes the location from scratches_,
493     // we skip the next location. This happens for arm64.
494     for (size_t i = 0; i < scratches_.size(); ++i) {
495       Location scratch = scratches_[i];
496       // Only scratch overlapping with performed move source can be unblocked.
497       if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
498         FreeScratchLocation(pending_source);
499       }
500     }
501   }
502 }
503 
UpdateMoveSource(Location from,Location to)504 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
505   // This function is used to reduce the dependencies in the graph after
506   // (from -> to) has been performed. Since we ensure there is no move with the same
507   // destination, (to -> X) cannot be blocked while (from -> X) might still be
508   // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
509   // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
510   // a dependency between the two. If we update the source location from 1 to 2, we
511   // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
512   //
513   // This is not something we must do, but we can use fewer scratch locations with
514   // this trick. For example, we can avoid using additional scratch locations for
515   // moves (0 -> 1), (1 -> 2), (1 -> 0).
516   for (MoveOperands* move : moves_) {
517     if (move->GetSource().Equals(from)) {
518       move->SetSource(to);
519     }
520   }
521 }
522 
AddPendingMove(Location source,Location destination,DataType::Type type)523 void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
524                                                 Location destination,
525                                                 DataType::Type type) {
526   pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr));
527 }
528 
DeletePendingMove(MoveOperands * move)529 void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
530   RemoveElement(pending_moves_, move);
531 }
532 
GetUnblockedPendingMove(Location loc)533 MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
534   for (MoveOperands* move : pending_moves_) {
535     Location destination = move->GetDestination();
536     // Only moves with destination overlapping with input loc can be unblocked.
537     if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
538       return move;
539     }
540   }
541   return nullptr;
542 }
543 
IsBlockedByMoves(Location loc)544 bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
545   for (MoveOperands* move : pending_moves_) {
546     if (move->Blocks(loc)) {
547       return true;
548     }
549   }
550   for (MoveOperands* move : moves_) {
551     if (move->Blocks(loc)) {
552       return true;
553     }
554   }
555   return false;
556 }
557 
558 // So far it is only used for debugging purposes to make sure all pending moves
559 // have been performed.
GetNumberOfPendingMoves()560 size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
561   return pending_moves_.size();
562 }
563 
564 }  // namespace art
565