1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gvn.h"
18 
19 #include "base/arena_bit_vector.h"
20 #include "base/bit_vector-inl.h"
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "base/utils.h"
24 #include "side_effects_analysis.h"
25 
26 namespace art {
27 
28 /**
29  * A ValueSet holds instructions that can replace other instructions. It is updated
30  * through the `Add` method, and the `Kill` method. The `Kill` method removes
31  * instructions that are affected by the given side effect.
32  *
33  * The `Lookup` method returns an equivalent instruction to the given instruction
34  * if there is one in the set. In GVN, we would say those instructions have the
35  * same "number".
36  */
37 class ValueSet : public ArenaObject<kArenaAllocGvn> {
38  public:
39   // Constructs an empty ValueSet which owns all its buckets.
ValueSet(ScopedArenaAllocator * allocator)40   explicit ValueSet(ScopedArenaAllocator* allocator)
41       : allocator_(allocator),
42         num_buckets_(kMinimumNumberOfBuckets),
43         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
44         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
45         num_entries_(0u) {
46     DCHECK(IsPowerOfTwo(num_buckets_));
47     std::fill_n(buckets_, num_buckets_, nullptr);
48     buckets_owned_.SetInitialBits(num_buckets_);
49   }
50 
51   // Copy constructor. Depending on the load factor, it will either make a deep
52   // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
ValueSet(ScopedArenaAllocator * allocator,const ValueSet & other)53   ValueSet(ScopedArenaAllocator* allocator, const ValueSet& other)
54       : allocator_(allocator),
55         num_buckets_(other.IdealBucketCount()),
56         buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
57         buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
58         num_entries_(0u) {
59     DCHECK(IsPowerOfTwo(num_buckets_));
60     PopulateFromInternal(other);
61   }
62 
63   // Erases all values in this set and populates it with values from `other`.
PopulateFrom(const ValueSet & other)64   void PopulateFrom(const ValueSet& other) {
65     if (this == &other) {
66       return;
67     }
68     PopulateFromInternal(other);
69   }
70 
71   // Returns true if `this` has enough buckets so that if `other` is copied into
72   // it, the load factor will not cross the upper threshold.
73   // If `exact_match` is set, true is returned only if `this` has the ideal
74   // number of buckets. Larger number of buckets is allowed otherwise.
CanHoldCopyOf(const ValueSet & other,bool exact_match)75   bool CanHoldCopyOf(const ValueSet& other, bool exact_match) {
76     if (exact_match) {
77       return other.IdealBucketCount() == num_buckets_;
78     } else {
79       return other.IdealBucketCount() <= num_buckets_;
80     }
81   }
82 
83   // Adds an instruction in the set.
Add(HInstruction * instruction)84   void Add(HInstruction* instruction) {
85     DCHECK(Lookup(instruction) == nullptr);
86     size_t hash_code = HashCode(instruction);
87     size_t index = BucketIndex(hash_code);
88 
89     if (!buckets_owned_.IsBitSet(index)) {
90       CloneBucket(index);
91     }
92     buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]);
93     ++num_entries_;
94   }
95 
96   // If in the set, returns an equivalent instruction to the given instruction.
97   // Returns null otherwise.
Lookup(HInstruction * instruction) const98   HInstruction* Lookup(HInstruction* instruction) const {
99     size_t hash_code = HashCode(instruction);
100     size_t index = BucketIndex(hash_code);
101 
102     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
103       if (node->GetHashCode() == hash_code) {
104         HInstruction* existing = node->GetInstruction();
105         if (existing->Equals(instruction)) {
106           return existing;
107         }
108       }
109     }
110     return nullptr;
111   }
112 
113   // Returns whether instruction is in the set.
Contains(HInstruction * instruction) const114   bool Contains(HInstruction* instruction) const {
115     size_t hash_code = HashCode(instruction);
116     size_t index = BucketIndex(hash_code);
117 
118     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
119       if (node->GetInstruction() == instruction) {
120         return true;
121       }
122     }
123     return false;
124   }
125 
126   // Removes all instructions in the set affected by the given side effects.
Kill(SideEffects side_effects)127   void Kill(SideEffects side_effects) {
128     DeleteAllImpureWhich([side_effects](Node* node) {
129       return node->GetSideEffects().MayDependOn(side_effects);
130     });
131   }
132 
Clear()133   void Clear() {
134     num_entries_ = 0;
135     for (size_t i = 0; i < num_buckets_; ++i) {
136       buckets_[i] = nullptr;
137     }
138     buckets_owned_.SetInitialBits(num_buckets_);
139   }
140 
141   // Updates this set by intersecting with instructions in a predecessor's set.
IntersectWith(ValueSet * predecessor)142   void IntersectWith(ValueSet* predecessor) {
143     if (IsEmpty()) {
144       return;
145     } else if (predecessor->IsEmpty()) {
146       Clear();
147     } else {
148       // Pure instructions do not need to be tested because only impure
149       // instructions can be killed.
150       DeleteAllImpureWhich([predecessor](Node* node) {
151         return !predecessor->Contains(node->GetInstruction());
152       });
153     }
154   }
155 
IsEmpty() const156   bool IsEmpty() const { return num_entries_ == 0; }
GetNumberOfEntries() const157   size_t GetNumberOfEntries() const { return num_entries_; }
158 
159  private:
160   // Copies all entries from `other` to `this`.
PopulateFromInternal(const ValueSet & other)161   void PopulateFromInternal(const ValueSet& other) {
162     DCHECK_NE(this, &other);
163     DCHECK_GE(num_buckets_, other.IdealBucketCount());
164 
165     if (num_buckets_ == other.num_buckets_) {
166       // Hash table remains the same size. We copy the bucket pointers and leave
167       // all buckets_owned_ bits false.
168       buckets_owned_.ClearAllBits();
169       memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*));
170     } else {
171       // Hash table size changes. We copy and rehash all entries, and set all
172       // buckets_owned_ bits to true.
173       std::fill_n(buckets_, num_buckets_, nullptr);
174       for (size_t i = 0; i < other.num_buckets_; ++i) {
175         for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) {
176           size_t new_index = BucketIndex(node->GetHashCode());
177           buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
178         }
179       }
180       buckets_owned_.SetInitialBits(num_buckets_);
181     }
182 
183     num_entries_ = other.num_entries_;
184   }
185 
186   class Node : public ArenaObject<kArenaAllocGvn> {
187    public:
Node(HInstruction * instruction,size_t hash_code,Node * next)188     Node(HInstruction* instruction, size_t hash_code, Node* next)
189         : instruction_(instruction), hash_code_(hash_code), next_(next) {}
190 
GetHashCode() const191     size_t GetHashCode() const { return hash_code_; }
GetInstruction() const192     HInstruction* GetInstruction() const { return instruction_; }
GetNext() const193     Node* GetNext() const { return next_; }
SetNext(Node * node)194     void SetNext(Node* node) { next_ = node; }
195 
Dup(ScopedArenaAllocator * allocator,Node * new_next=nullptr)196     Node* Dup(ScopedArenaAllocator* allocator, Node* new_next = nullptr) {
197       return new (allocator) Node(instruction_, hash_code_, new_next);
198     }
199 
GetSideEffects() const200     SideEffects GetSideEffects() const {
201       // Deoptimize is a weird instruction since it's predicated and
202       // never-return. Its side-effects are to prevent the splitting of dex
203       // instructions across it (which could cause inconsistencies once we begin
204       // interpreting again). In the context of GVN the 'perform-deopt' branch is not
205       // relevant and we only need to care about the no-op case, in which case there are
206       // no side-effects. By doing this we are able to eliminate redundant (i.e.
207       // dominated deopts with GVNd conditions) deoptimizations.
208       if (instruction_->IsDeoptimize()) {
209         return SideEffects::None();
210       } else {
211         return instruction_->GetSideEffects();
212       }
213     }
214 
215    private:
216     HInstruction* const instruction_;
217     const size_t hash_code_;
218     Node* next_;
219 
220     DISALLOW_COPY_AND_ASSIGN(Node);
221   };
222 
223   // Creates our own copy of a bucket that is currently pointing to a parent.
224   // This algorithm can be called while iterating over the bucket because it
225   // preserves the order of entries in the bucket and will return the clone of
226   // the given 'iterator'.
CloneBucket(size_t index,Node * iterator=nullptr)227   Node* CloneBucket(size_t index, Node* iterator = nullptr) {
228     DCHECK(!buckets_owned_.IsBitSet(index));
229     Node* clone_current = nullptr;
230     Node* clone_previous = nullptr;
231     Node* clone_iterator = nullptr;
232     for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
233       clone_current = node->Dup(allocator_, nullptr);
234       if (node == iterator) {
235         clone_iterator = clone_current;
236       }
237       if (clone_previous == nullptr) {
238         buckets_[index] = clone_current;
239       } else {
240         clone_previous->SetNext(clone_current);
241       }
242       clone_previous = clone_current;
243     }
244     buckets_owned_.SetBit(index);
245     return clone_iterator;
246   }
247 
248   // Iterates over buckets with impure instructions (even indices) and deletes
249   // the ones on which 'cond' returns true.
250   template<typename Functor>
DeleteAllImpureWhich(Functor cond)251   void DeleteAllImpureWhich(Functor cond) {
252     for (size_t i = 0; i < num_buckets_; i += 2) {
253       Node* node = buckets_[i];
254       Node* previous = nullptr;
255 
256       if (node == nullptr) {
257         continue;
258       }
259 
260       if (!buckets_owned_.IsBitSet(i)) {
261         // Bucket is not owned but maybe we won't need to change it at all.
262         // Iterate as long as the entries don't satisfy 'cond'.
263         while (node != nullptr) {
264           if (cond(node)) {
265             // We do need to delete an entry but we do not own the bucket.
266             // Clone the bucket, make sure 'previous' and 'node' point to
267             // the cloned entries and break.
268             previous = CloneBucket(i, previous);
269             node = (previous == nullptr) ? buckets_[i] : previous->GetNext();
270             break;
271           }
272           previous = node;
273           node = node->GetNext();
274         }
275       }
276 
277       // By this point we either own the bucket and can start deleting entries,
278       // or we do not own it but no entries matched 'cond'.
279       DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr);
280 
281       // We iterate over the remainder of entries and delete those that match
282       // the given condition.
283       while (node != nullptr) {
284         Node* next = node->GetNext();
285         if (cond(node)) {
286           if (previous == nullptr) {
287             buckets_[i] = next;
288           } else {
289             previous->SetNext(next);
290           }
291         } else {
292           previous = node;
293         }
294         node = next;
295       }
296     }
297   }
298 
299   // Computes a bucket count such that the load factor is reasonable.
300   // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2.
IdealBucketCount() const301   size_t IdealBucketCount() const {
302     size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1));
303     if (bucket_count > kMinimumNumberOfBuckets) {
304       return bucket_count;
305     } else {
306       return kMinimumNumberOfBuckets;
307     }
308   }
309 
310   // Generates a hash code for an instruction.
HashCode(HInstruction * instruction) const311   size_t HashCode(HInstruction* instruction) const {
312     size_t hash_code = instruction->ComputeHashCode();
313     // Pure instructions are put into odd buckets to speed up deletion. Note that in the
314     // case of irreducible loops, we don't put pure instructions in odd buckets, as we
315     // need to delete them when entering the loop.
316     // ClinitCheck is treated as a pure instruction since it's only executed
317     // once.
318     bool pure = !instruction->GetSideEffects().HasDependencies() ||
319                 instruction->IsClinitCheck();
320     if (!pure || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) {
321       return (hash_code << 1) | 0;
322     } else {
323       return (hash_code << 1) | 1;
324     }
325   }
326 
327   // Converts a hash code to a bucket index.
BucketIndex(size_t hash_code) const328   size_t BucketIndex(size_t hash_code) const {
329     return hash_code & (num_buckets_ - 1);
330   }
331 
332   ScopedArenaAllocator* const allocator_;
333 
334   // The internal bucket implementation of the set.
335   size_t const num_buckets_;
336   Node** const buckets_;
337 
338   // Flags specifying which buckets were copied into the set from its parent.
339   // If a flag is not set, the corresponding bucket points to entries in the
340   // parent and must be cloned prior to making changes.
341   ArenaBitVector buckets_owned_;
342 
343   // The number of entries in the set.
344   size_t num_entries_;
345 
346   static constexpr size_t kMinimumNumberOfBuckets = 8;
347 
348   DISALLOW_COPY_AND_ASSIGN(ValueSet);
349 };
350 
351 /**
352  * Optimization phase that removes redundant instruction.
353  */
354 class GlobalValueNumberer : public ValueObject {
355  public:
GlobalValueNumberer(HGraph * graph,const SideEffectsAnalysis & side_effects)356   GlobalValueNumberer(HGraph* graph,
357                       const SideEffectsAnalysis& side_effects)
358       : graph_(graph),
359         allocator_(graph->GetArenaStack()),
360         side_effects_(side_effects),
361         sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)),
362         visited_blocks_(
363             &allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn) {
364     visited_blocks_.ClearAllBits();
365   }
366 
367   bool Run();
368 
369  private:
370   // Per-block GVN. Will also update the ValueSet of the dominated and
371   // successor blocks.
372   void VisitBasicBlock(HBasicBlock* block);
373 
374   HGraph* graph_;
375   ScopedArenaAllocator allocator_;
376   const SideEffectsAnalysis& side_effects_;
377 
FindSetFor(HBasicBlock * block) const378   ValueSet* FindSetFor(HBasicBlock* block) const {
379     ValueSet* result = sets_[block->GetBlockId()];
380     DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId();
381     return result;
382   }
383 
AbandonSetFor(HBasicBlock * block)384   void AbandonSetFor(HBasicBlock* block) {
385     DCHECK(sets_[block->GetBlockId()] != nullptr)
386         << "Block B" << block->GetBlockId() << " expected to have a set";
387     sets_[block->GetBlockId()] = nullptr;
388   }
389 
390   // Returns false if the GlobalValueNumberer has already visited all blocks
391   // which may reference `block`.
392   bool WillBeReferencedAgain(HBasicBlock* block) const;
393 
394   // Iterates over visited blocks and finds one which has a ValueSet such that:
395   // (a) it will not be referenced in the future, and
396   // (b) it can hold a copy of `reference_set` with a reasonable load factor.
397   HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block,
398                                                  const ValueSet& reference_set) const;
399 
400   // ValueSet for blocks. Initially null, but for an individual block they
401   // are allocated and populated by the dominator, and updated by all blocks
402   // in the path from the dominator to the block.
403   ScopedArenaVector<ValueSet*> sets_;
404 
405   // BitVector which serves as a fast-access map from block id to
406   // visited/unvisited Boolean.
407   ArenaBitVector visited_blocks_;
408 
409   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
410 };
411 
Run()412 bool GlobalValueNumberer::Run() {
413   DCHECK(side_effects_.HasRun());
414   sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_);
415 
416   // Use the reverse post order to ensure the non back-edge predecessors of a block are
417   // visited before the block itself.
418   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
419     VisitBasicBlock(block);
420   }
421   return true;
422 }
423 
VisitBasicBlock(HBasicBlock * block)424 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
425   ValueSet* set = nullptr;
426 
427   const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
428   if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
429     // The entry block should only accumulate constant instructions, and
430     // the builder puts constants only in the entry block.
431     // Therefore, there is no need to propagate the value set to the next block.
432     set = new (&allocator_) ValueSet(&allocator_);
433   } else {
434     HBasicBlock* dominator = block->GetDominator();
435     ValueSet* dominator_set = FindSetFor(dominator);
436 
437     if (dominator->GetSuccessors().size() == 1) {
438       // `block` is a direct successor of its dominator. No need to clone the
439       // dominator's set, `block` can take over its ownership including its buckets.
440       DCHECK_EQ(dominator->GetSingleSuccessor(), block);
441       AbandonSetFor(dominator);
442       set = dominator_set;
443     } else {
444       // Try to find a basic block which will never be referenced again and whose
445       // ValueSet can therefore be recycled. We will need to copy `dominator_set`
446       // into the recycled set, so we pass `dominator_set` as a reference for size.
447       HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set);
448       if (recyclable == nullptr) {
449         // No block with a suitable ValueSet found. Allocate a new one and
450         // copy `dominator_set` into it.
451         set = new (&allocator_) ValueSet(&allocator_, *dominator_set);
452       } else {
453         // Block with a recyclable ValueSet found. Clone `dominator_set` into it.
454         set = FindSetFor(recyclable);
455         AbandonSetFor(recyclable);
456         set->PopulateFrom(*dominator_set);
457       }
458     }
459 
460     if (!set->IsEmpty()) {
461       if (block->IsLoopHeader()) {
462         if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
463           // To satisfy our linear scan algorithm, no instruction should flow in an irreducible
464           // loop header. We clear the set at entry of irreducible loops and any loop containing
465           // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
466           // across the irreducible loop.
467           // Note that, if we're not compiling OSR, we could still do GVN and introduce
468           // phis at irreducible loop headers. We decided it was not worth the complexity.
469           set->Clear();
470         } else {
471           DCHECK(!block->GetLoopInformation()->IsIrreducible());
472           DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
473           set->Kill(side_effects_.GetLoopEffects(block));
474         }
475       } else if (predecessors.size() > 1) {
476         for (HBasicBlock* predecessor : predecessors) {
477           set->IntersectWith(FindSetFor(predecessor));
478           if (set->IsEmpty()) {
479             break;
480           }
481         }
482       }
483     }
484   }
485 
486   sets_[block->GetBlockId()] = set;
487 
488   HInstruction* current = block->GetFirstInstruction();
489   while (current != nullptr) {
490     // Save the next instruction in case `current` is removed from the graph.
491     HInstruction* next = current->GetNext();
492     // Do not kill the set with the side effects of the instruction just now: if
493     // the instruction is GVN'ed, we don't need to kill.
494     //
495     // BoundType is a special case example of an instruction which shouldn't be moved but can be
496     // GVN'ed.
497     //
498     // Deoptimize is a special case since even though we don't want to move it we can still remove
499     // it for GVN.
500     if (current->CanBeMoved() || current->IsBoundType() || current->IsDeoptimize()) {
501       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
502         // For commutative ops, (x op y) will be treated the same as (y op x)
503         // after fixed ordering.
504         current->AsBinaryOperation()->OrderInputs();
505       }
506       HInstruction* existing = set->Lookup(current);
507       if (existing != nullptr) {
508         // This replacement doesn't make more OrderInputs() necessary since
509         // current is either used by an instruction that it dominates,
510         // which hasn't been visited yet due to the order we visit instructions.
511         // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway.
512         current->ReplaceWith(existing);
513         current->GetBlock()->RemoveInstruction(current);
514       } else {
515         set->Kill(current->GetSideEffects());
516         set->Add(current);
517       }
518     } else {
519       set->Kill(current->GetSideEffects());
520     }
521     current = next;
522   }
523 
524   visited_blocks_.SetBit(block->GetBlockId());
525 }
526 
WillBeReferencedAgain(HBasicBlock * block) const527 bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
528   DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
529 
530   for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
531     if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
532       return true;
533     }
534   }
535 
536   for (const HBasicBlock* successor : block->GetSuccessors()) {
537     if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
538       return true;
539     }
540   }
541 
542   return false;
543 }
544 
FindVisitedBlockWithRecyclableSet(HBasicBlock * block,const ValueSet & reference_set) const545 HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
546     HBasicBlock* block, const ValueSet& reference_set) const {
547   HBasicBlock* secondary_match = nullptr;
548 
549   for (size_t block_id : visited_blocks_.Indexes()) {
550     ValueSet* current_set = sets_[block_id];
551     if (current_set == nullptr) {
552       // Set was already recycled.
553       continue;
554     }
555 
556     HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id];
557 
558     // We test if `current_set` has enough buckets to store a copy of
559     // `reference_set` with a reasonable load factor. If we find a set whose
560     // number of buckets matches perfectly, we return right away. If we find one
561     // that is larger, we return it if no perfectly-matching set is found.
562     // Note that we defer testing WillBeReferencedAgain until all other criteria
563     // have been satisfied because it might be expensive.
564     if (current_set->CanHoldCopyOf(reference_set, /* exact_match= */ true)) {
565       if (!WillBeReferencedAgain(current_block)) {
566         return current_block;
567       }
568     } else if (secondary_match == nullptr &&
569                current_set->CanHoldCopyOf(reference_set, /* exact_match= */ false)) {
570       if (!WillBeReferencedAgain(current_block)) {
571         secondary_match = current_block;
572       }
573     }
574   }
575 
576   return secondary_match;
577 }
578 
Run()579 bool GVNOptimization::Run() {
580   GlobalValueNumberer gvn(graph_, side_effects_);
581   return gvn.Run();
582 }
583 
584 }  // namespace art
585