1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_LIBARTBASE_BASE_HASH_SET_H_
18 #define ART_LIBARTBASE_BASE_HASH_SET_H_
19 
20 #include <stdint.h>
21 
22 #include <functional>
23 #include <iterator>
24 #include <memory>
25 #include <string>
26 #include <type_traits>
27 #include <utility>
28 
29 #include <android-base/logging.h>
30 
31 #include "base/data_hash.h"
32 #include "bit_utils.h"
33 #include "macros.h"
34 
35 namespace art {
36 
37 template <class Elem, class HashSetType>
38 class HashSetIterator {
39  public:
40   using iterator_category = std::forward_iterator_tag;
41   using value_type = Elem;
42   using difference_type = std::ptrdiff_t;
43   using pointer = Elem*;
44   using reference = Elem&;
45 
46   HashSetIterator(const HashSetIterator&) = default;
47   HashSetIterator(HashSetIterator&&) = default;
HashSetIterator(HashSetType * hash_set,size_t index)48   HashSetIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {}
49 
50   // Conversion from iterator to const_iterator.
51   template <class OtherElem,
52             class OtherHashSetType,
53             typename = typename std::enable_if<
54                 std::is_same<Elem, const OtherElem>::value &&
55                 std::is_same<HashSetType, const OtherHashSetType>::value>::type>
HashSetIterator(const HashSetIterator<OtherElem,OtherHashSetType> & other)56   HashSetIterator(const HashSetIterator<OtherElem, OtherHashSetType>& other)
57       : index_(other.index_), hash_set_(other.hash_set_) {}
58 
59   HashSetIterator& operator=(const HashSetIterator&) = default;
60   HashSetIterator& operator=(HashSetIterator&&) = default;
61 
62   bool operator==(const HashSetIterator& other) const {
63     return hash_set_ == other.hash_set_ && this->index_ == other.index_;
64   }
65 
66   bool operator!=(const HashSetIterator& other) const {
67     return !(*this == other);
68   }
69 
70   HashSetIterator operator++() {  // Value after modification.
71     this->index_ = hash_set_->NextNonEmptySlot(index_);
72     return *this;
73   }
74 
75   HashSetIterator operator++(int) {
76     HashSetIterator temp = *this;
77     ++*this;
78     return temp;
79   }
80 
81   Elem& operator*() const {
82     DCHECK(!hash_set_->IsFreeSlot(this->index_));
83     return hash_set_->ElementForIndex(this->index_);
84   }
85 
86   Elem* operator->() const {
87     return &**this;
88   }
89 
90  private:
91   size_t index_;
92   HashSetType* hash_set_;
93 
94   template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
95   friend bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
96                          const HashSetIterator<Elem2, HashSetType2>& rhs);
97   template <class T, class EmptyFn, class HashFn, class Pred, class Alloc> friend class HashSet;
98   template <class OtherElem, class OtherHashSetType> friend class HashSetIterator;
99 };
100 
101 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
102 bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
103                 const HashSetIterator<Elem2, HashSetType2>& rhs) {
104   static_assert(
105       std::is_convertible<HashSetIterator<Elem1, HashSetType1>,
106                           HashSetIterator<Elem2, HashSetType2>>::value ||
107       std::is_convertible<HashSetIterator<Elem2, HashSetType2>,
108                           HashSetIterator<Elem1, HashSetType1>>::value, "Bad iterator types.");
109   DCHECK_EQ(lhs.hash_set_, rhs.hash_set_);
110   return lhs.index_ == rhs.index_;
111 }
112 
113 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
114 bool operator!=(const HashSetIterator<Elem1, HashSetType1>& lhs,
115                 const HashSetIterator<Elem2, HashSetType2>& rhs) {
116   return !(lhs == rhs);
117 }
118 
119 // Returns true if an item is empty.
120 template <class T>
121 class DefaultEmptyFn {
122  public:
MakeEmpty(T & item)123   void MakeEmpty(T& item) const {
124     item = T();
125   }
IsEmpty(const T & item)126   bool IsEmpty(const T& item) const {
127     return item == T();
128   }
129 };
130 
131 template <class T>
132 class DefaultEmptyFn<T*> {
133  public:
MakeEmpty(T * & item)134   void MakeEmpty(T*& item) const {
135     item = nullptr;
136   }
IsEmpty(T * const & item)137   bool IsEmpty(T* const& item) const {
138     return item == nullptr;
139   }
140 };
141 
142 template <class T>
143 using DefaultHashFn = typename std::conditional<std::is_same<T, std::string>::value,
144                                                 DataHash,
145                                                 std::hash<T>>::type;
146 
147 struct DefaultStringEquals {
148   // Allow comparison with anything that can be compared to std::string,
149   // for example std::string_view.
150   template <typename T>
operatorDefaultStringEquals151   bool operator()(const std::string& lhs, const T& rhs) const {
152     return lhs == rhs;
153   }
154 };
155 
156 template <class T>
157 using DefaultPred = typename std::conditional<std::is_same<T, std::string>::value,
158                                               DefaultStringEquals,
159                                               std::equal_to<T>>::type;
160 
161 // Low memory version of a hash set, uses less memory than std::unordered_multiset since elements
162 // aren't boxed. Uses linear probing to resolve collisions.
163 // EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
164 // TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
165 // and more complicated.
166 template <class T,
167           class EmptyFn = DefaultEmptyFn<T>,
168           class HashFn = DefaultHashFn<T>,
169           class Pred = DefaultPred<T>,
170           class Alloc = std::allocator<T>>
171 class HashSet {
172  public:
173   using value_type = T;
174   using allocator_type = Alloc;
175   using reference = T&;
176   using const_reference = const T&;
177   using pointer = T*;
178   using const_pointer = const T*;
179   using iterator = HashSetIterator<T, HashSet>;
180   using const_iterator = HashSetIterator<const T, const HashSet>;
181   using size_type = size_t;
182   using difference_type = ptrdiff_t;
183 
184   static constexpr double kDefaultMinLoadFactor = 0.4;
185   static constexpr double kDefaultMaxLoadFactor = 0.7;
186   static constexpr size_t kMinBuckets = 1000;
187 
188   // If we don't own the data, this will create a new array which owns the data.
clear()189   void clear() {
190     DeallocateStorage();
191     num_elements_ = 0;
192     elements_until_expand_ = 0;
193   }
194 
HashSet()195   HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
196 
HashSet(double min_load_factor,double max_load_factor)197   HashSet(double min_load_factor, double max_load_factor) noexcept
198       : num_elements_(0u),
199         num_buckets_(0u),
200         elements_until_expand_(0u),
201         owns_data_(false),
202         data_(nullptr),
203         min_load_factor_(min_load_factor),
204         max_load_factor_(max_load_factor) {
205     DCHECK_GT(min_load_factor, 0.0);
206     DCHECK_LT(max_load_factor, 1.0);
207   }
208 
HashSet(const allocator_type & alloc)209   explicit HashSet(const allocator_type& alloc) noexcept
210       : allocfn_(alloc),
211         hashfn_(),
212         emptyfn_(),
213         pred_(),
214         num_elements_(0u),
215         num_buckets_(0u),
216         elements_until_expand_(0u),
217         owns_data_(false),
218         data_(nullptr),
219         min_load_factor_(kDefaultMinLoadFactor),
220         max_load_factor_(kDefaultMaxLoadFactor) {
221   }
222 
HashSet(const HashSet & other)223   HashSet(const HashSet& other) noexcept
224       : allocfn_(other.allocfn_),
225         hashfn_(other.hashfn_),
226         emptyfn_(other.emptyfn_),
227         pred_(other.pred_),
228         num_elements_(other.num_elements_),
229         num_buckets_(0),
230         elements_until_expand_(other.elements_until_expand_),
231         owns_data_(false),
232         data_(nullptr),
233         min_load_factor_(other.min_load_factor_),
234         max_load_factor_(other.max_load_factor_) {
235     AllocateStorage(other.NumBuckets());
236     for (size_t i = 0; i < num_buckets_; ++i) {
237       ElementForIndex(i) = other.data_[i];
238     }
239   }
240 
241   // noexcept required so that the move constructor is used instead of copy constructor.
242   // b/27860101
HashSet(HashSet && other)243   HashSet(HashSet&& other) noexcept
244       : allocfn_(std::move(other.allocfn_)),
245         hashfn_(std::move(other.hashfn_)),
246         emptyfn_(std::move(other.emptyfn_)),
247         pred_(std::move(other.pred_)),
248         num_elements_(other.num_elements_),
249         num_buckets_(other.num_buckets_),
250         elements_until_expand_(other.elements_until_expand_),
251         owns_data_(other.owns_data_),
252         data_(other.data_),
253         min_load_factor_(other.min_load_factor_),
254         max_load_factor_(other.max_load_factor_) {
255     other.num_elements_ = 0u;
256     other.num_buckets_ = 0u;
257     other.elements_until_expand_ = 0u;
258     other.owns_data_ = false;
259     other.data_ = nullptr;
260   }
261 
262   // Construct from existing data.
263   // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
264   // passed in ptr_.
HashSet(const uint8_t * ptr,bool make_copy_of_data,size_t * read_count)265   HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
266     uint64_t temp;
267     size_t offset = 0;
268     offset = ReadFromBytes(ptr, offset, &temp);
269     num_elements_ = static_cast<uint64_t>(temp);
270     offset = ReadFromBytes(ptr, offset, &temp);
271     num_buckets_ = static_cast<uint64_t>(temp);
272     CHECK_LE(num_elements_, num_buckets_);
273     offset = ReadFromBytes(ptr, offset, &temp);
274     elements_until_expand_ = static_cast<uint64_t>(temp);
275     offset = ReadFromBytes(ptr, offset, &min_load_factor_);
276     offset = ReadFromBytes(ptr, offset, &max_load_factor_);
277     if (!make_copy_of_data) {
278       owns_data_ = false;
279       data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
280       offset += sizeof(*data_) * num_buckets_;
281     } else {
282       AllocateStorage(num_buckets_);
283       // Write elements, not that this may not be safe for cross compilation if the elements are
284       // pointer sized.
285       for (size_t i = 0; i < num_buckets_; ++i) {
286         offset = ReadFromBytes(ptr, offset, &data_[i]);
287       }
288     }
289     // Caller responsible for aligning.
290     *read_count = offset;
291   }
292 
293   // Returns how large the table is after being written. If target is null, then no writing happens
294   // but the size is still returned. Target must be 8 byte aligned.
WriteToMemory(uint8_t * ptr)295   size_t WriteToMemory(uint8_t* ptr) const {
296     size_t offset = 0;
297     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
298     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
299     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
300     offset = WriteToBytes(ptr, offset, min_load_factor_);
301     offset = WriteToBytes(ptr, offset, max_load_factor_);
302     // Write elements, not that this may not be safe for cross compilation if the elements are
303     // pointer sized.
304     for (size_t i = 0; i < num_buckets_; ++i) {
305       offset = WriteToBytes(ptr, offset, data_[i]);
306     }
307     // Caller responsible for aligning.
308     return offset;
309   }
310 
~HashSet()311   ~HashSet() {
312     DeallocateStorage();
313   }
314 
315   HashSet& operator=(HashSet&& other) noexcept {
316     HashSet(std::move(other)).swap(*this);  // NOLINT [runtime/explicit] [5]
317     return *this;
318   }
319 
320   HashSet& operator=(const HashSet& other) noexcept {
321     HashSet(other).swap(*this);  // NOLINT(runtime/explicit) - a case of lint gone mad.
322     return *this;
323   }
324 
325   // Lower case for c++11 for each.
begin()326   iterator begin() {
327     iterator ret(this, 0);
328     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
329       ++ret;  // Skip all the empty slots.
330     }
331     return ret;
332   }
333 
334   // Lower case for c++11 for each. const version.
begin()335   const_iterator begin() const {
336     const_iterator ret(this, 0);
337     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
338       ++ret;  // Skip all the empty slots.
339     }
340     return ret;
341   }
342 
343   // Lower case for c++11 for each.
end()344   iterator end() {
345     return iterator(this, NumBuckets());
346   }
347 
348   // Lower case for c++11 for each. const version.
end()349   const_iterator end() const {
350     return const_iterator(this, NumBuckets());
351   }
352 
size()353   size_t size() const {
354     return num_elements_;
355   }
356 
empty()357   bool empty() const {
358     return size() == 0;
359   }
360 
361   // Erase algorithm:
362   // Make an empty slot where the iterator is pointing.
363   // Scan forwards until we hit another empty slot.
364   // If an element in between doesn't rehash to the range from the current empty slot to the
365   // iterator. It must be before the empty slot, in that case we can move it to the empty slot
366   // and set the empty slot to be the location we just moved from.
367   // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
368   // element to its actual location/index.
369   // Note that since erase shuffles back elements, it may result in the same element being visited
370   // twice during HashSet iteration. This happens when an element already visited during iteration
371   // gets shuffled to the end of the bucket array.
erase(iterator it)372   iterator erase(iterator it) {
373     // empty_index is the index that will become empty.
374     size_t empty_index = it.index_;
375     DCHECK(!IsFreeSlot(empty_index));
376     size_t next_index = empty_index;
377     bool filled = false;  // True if we filled the empty index.
378     while (true) {
379       next_index = NextIndex(next_index);
380       T& next_element = ElementForIndex(next_index);
381       // If the next element is empty, we are done. Make sure to clear the current empty index.
382       if (emptyfn_.IsEmpty(next_element)) {
383         emptyfn_.MakeEmpty(ElementForIndex(empty_index));
384         break;
385       }
386       // Otherwise try to see if the next element can fill the current empty index.
387       const size_t next_hash = hashfn_(next_element);
388       // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
389       // nothing we can do.
390       size_t next_ideal_index = IndexForHash(next_hash);
391       // Loop around if needed for our check.
392       size_t unwrapped_next_index = next_index;
393       if (unwrapped_next_index < empty_index) {
394         unwrapped_next_index += NumBuckets();
395       }
396       // Loop around if needed for our check.
397       size_t unwrapped_next_ideal_index = next_ideal_index;
398       if (unwrapped_next_ideal_index < empty_index) {
399         unwrapped_next_ideal_index += NumBuckets();
400       }
401       if (unwrapped_next_ideal_index <= empty_index ||
402           unwrapped_next_ideal_index > unwrapped_next_index) {
403         // If the target index isn't within our current range it must have been probed from before
404         // the empty index.
405         ElementForIndex(empty_index) = std::move(next_element);
406         filled = true;  // TODO: Optimize
407         empty_index = next_index;
408       }
409     }
410     --num_elements_;
411     // If we didn't fill the slot then we need go to the next non free slot.
412     if (!filled) {
413       ++it;
414     }
415     return it;
416   }
417 
418   // Find an element, returns end() if not found.
419   // Allows custom key (K) types, example of when this is useful:
420   // Set of Class* indexed by name, want to find a class with a name but can't allocate
421   // a temporary Class object in the heap for performance solution.
422   template <typename K>
find(const K & key)423   iterator find(const K& key) {
424     return FindWithHash(key, hashfn_(key));
425   }
426 
427   template <typename K>
find(const K & key)428   const_iterator find(const K& key) const {
429     return FindWithHash(key, hashfn_(key));
430   }
431 
432   template <typename K>
FindWithHash(const K & key,size_t hash)433   iterator FindWithHash(const K& key, size_t hash) {
434     return iterator(this, FindIndex(key, hash));
435   }
436 
437   template <typename K>
FindWithHash(const K & key,size_t hash)438   const_iterator FindWithHash(const K& key, size_t hash) const {
439     return const_iterator(this, FindIndex(key, hash));
440   }
441 
442   // Insert an element with hint, allows duplicates.
443   // Note: The hint is not very useful for a HashSet<> unless there are many hash conflicts
444   // and in that case the use of HashSet<> itself should be reconsidered.
insert(const_iterator hint ATTRIBUTE_UNUSED,const T & element)445   std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, const T& element) {
446     return insert(element);
447   }
insert(const_iterator hint ATTRIBUTE_UNUSED,T && element)448   std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, T&& element) {
449     return insert(std::move(element));
450   }
451 
452   // Insert an element, allows duplicates.
insert(const T & element)453   std::pair<iterator, bool> insert(const T& element) {
454     return InsertWithHash(element, hashfn_(element));
455   }
insert(T && element)456   std::pair<iterator, bool> insert(T&& element) {
457     return InsertWithHash(std::move(element), hashfn_(element));
458   }
459 
460   template <typename U, typename = typename std::enable_if<std::is_convertible<U, T>::value>::type>
InsertWithHash(U && element,size_t hash)461   std::pair<iterator, bool> InsertWithHash(U&& element, size_t hash) {
462     DCHECK_EQ(hash, hashfn_(element));
463     if (num_elements_ >= elements_until_expand_) {
464       Expand();
465       DCHECK_LT(num_elements_, elements_until_expand_);
466     }
467     bool find_failed = false;
468     auto find_fail_fn = [&](size_t index) {
469       find_failed = true;
470       return index;
471     };
472     size_t index = FindIndexImpl(element, hash, find_fail_fn);
473     if (find_failed) {
474       data_[index] = std::forward<U>(element);
475       ++num_elements_;
476     }
477     return std::make_pair(iterator(this, index), find_failed);
478   }
479 
swap(HashSet & other)480   void swap(HashSet& other) {
481     // Use argument-dependent lookup with fall-back to std::swap() for function objects.
482     using std::swap;
483     swap(allocfn_, other.allocfn_);
484     swap(hashfn_, other.hashfn_);
485     swap(emptyfn_, other.emptyfn_);
486     swap(pred_, other.pred_);
487     std::swap(data_, other.data_);
488     std::swap(num_buckets_, other.num_buckets_);
489     std::swap(num_elements_, other.num_elements_);
490     std::swap(elements_until_expand_, other.elements_until_expand_);
491     std::swap(min_load_factor_, other.min_load_factor_);
492     std::swap(max_load_factor_, other.max_load_factor_);
493     std::swap(owns_data_, other.owns_data_);
494   }
495 
get_allocator()496   allocator_type get_allocator() const {
497     return allocfn_;
498   }
499 
ShrinkToMaximumLoad()500   void ShrinkToMaximumLoad() {
501     Resize(size() / max_load_factor_);
502   }
503 
504   // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
505   // set. No-op if the hash set is already large enough to do this.
reserve(size_t num_elements)506   void reserve(size_t num_elements) {
507     size_t num_buckets = num_elements / max_load_factor_;
508     // Deal with rounding errors. Add one for rounding.
509     while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
510       ++num_buckets;
511     }
512     if (num_buckets > NumBuckets()) {
513       Resize(num_buckets);
514     }
515   }
516 
517   // To distance that inserted elements were probed. Used for measuring how good hash functions
518   // are.
TotalProbeDistance()519   size_t TotalProbeDistance() const {
520     size_t total = 0;
521     for (size_t i = 0; i < NumBuckets(); ++i) {
522       const T& element = ElementForIndex(i);
523       if (!emptyfn_.IsEmpty(element)) {
524         size_t ideal_location = IndexForHash(hashfn_(element));
525         if (ideal_location > i) {
526           total += i + NumBuckets() - ideal_location;
527         } else {
528           total += i - ideal_location;
529         }
530       }
531     }
532     return total;
533   }
534 
535   // Calculate the current load factor and return it.
CalculateLoadFactor()536   double CalculateLoadFactor() const {
537     return static_cast<double>(size()) / static_cast<double>(NumBuckets());
538   }
539 
540   // Make sure that everything reinserts in the right spot. Returns the number of errors.
Verify()541   size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
542     size_t errors = 0;
543     for (size_t i = 0; i < num_buckets_; ++i) {
544       T& element = data_[i];
545       if (!emptyfn_.IsEmpty(element)) {
546         T temp;
547         emptyfn_.MakeEmpty(temp);
548         std::swap(temp, element);
549         size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
550         if (i != first_slot) {
551           LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
552           ++errors;
553         }
554         std::swap(temp, element);
555       }
556     }
557     return errors;
558   }
559 
GetMinLoadFactor()560   double GetMinLoadFactor() const {
561     return min_load_factor_;
562   }
563 
GetMaxLoadFactor()564   double GetMaxLoadFactor() const {
565     return max_load_factor_;
566   }
567 
568   // Change the load factor of the hash set. If the current load factor is greater than the max
569   // specified, then we resize the hash table storage.
SetLoadFactor(double min_load_factor,double max_load_factor)570   void SetLoadFactor(double min_load_factor, double max_load_factor) {
571     DCHECK_LT(min_load_factor, max_load_factor);
572     DCHECK_GT(min_load_factor, 0.0);
573     DCHECK_LT(max_load_factor, 1.0);
574     min_load_factor_ = min_load_factor;
575     max_load_factor_ = max_load_factor;
576     elements_until_expand_ = NumBuckets() * max_load_factor_;
577     // If the current load factor isn't in the range, then resize to the mean of the minimum and
578     // maximum load factor.
579     const double load_factor = CalculateLoadFactor();
580     if (load_factor > max_load_factor_) {
581       Resize(size() / ((min_load_factor_ + max_load_factor_) * 0.5));
582     }
583   }
584 
585   // The hash set expands when Size() reaches ElementsUntilExpand().
ElementsUntilExpand()586   size_t ElementsUntilExpand() const {
587     return elements_until_expand_;
588   }
589 
NumBuckets()590   size_t NumBuckets() const {
591     return num_buckets_;
592   }
593 
594  private:
ElementForIndex(size_t index)595   T& ElementForIndex(size_t index) {
596     DCHECK_LT(index, NumBuckets());
597     DCHECK(data_ != nullptr);
598     return data_[index];
599   }
600 
ElementForIndex(size_t index)601   const T& ElementForIndex(size_t index) const {
602     DCHECK_LT(index, NumBuckets());
603     DCHECK(data_ != nullptr);
604     return data_[index];
605   }
606 
IndexForHash(size_t hash)607   size_t IndexForHash(size_t hash) const {
608     // Protect against undefined behavior (division by zero).
609     if (UNLIKELY(num_buckets_ == 0)) {
610       return 0;
611     }
612     return hash % num_buckets_;
613   }
614 
NextIndex(size_t index)615   size_t NextIndex(size_t index) const {
616     if (UNLIKELY(++index >= num_buckets_)) {
617       DCHECK_EQ(index, NumBuckets());
618       return 0;
619     }
620     return index;
621   }
622 
623   // Find the hash table slot for an element, or return NumBuckets() if not found.
624   // This value for not found is important so that iterator(this, FindIndex(...)) == end().
625   template <typename K>
FindIndex(const K & element,size_t hash)626   size_t FindIndex(const K& element, size_t hash) const {
627     // Guard against failing to get an element for a non-existing index.
628     if (UNLIKELY(NumBuckets() == 0)) {
629       return 0;
630     }
631     auto fail_fn = [&](size_t index ATTRIBUTE_UNUSED) { return NumBuckets(); };
632     return FindIndexImpl(element, hash, fail_fn);
633   }
634 
635   // Find the hash table slot for an element, or return an empty slot index if not found.
636   template <typename K, typename FailFn>
FindIndexImpl(const K & element,size_t hash,FailFn fail_fn)637   size_t FindIndexImpl(const K& element, size_t hash, FailFn fail_fn) const {
638     DCHECK_NE(NumBuckets(), 0u);
639     DCHECK_EQ(hashfn_(element), hash);
640     size_t index = IndexForHash(hash);
641     while (true) {
642       const T& slot = ElementForIndex(index);
643       if (emptyfn_.IsEmpty(slot)) {
644         return fail_fn(index);
645       }
646       if (pred_(slot, element)) {
647         return index;
648       }
649       index = NextIndex(index);
650     }
651   }
652 
IsFreeSlot(size_t index)653   bool IsFreeSlot(size_t index) const {
654     return emptyfn_.IsEmpty(ElementForIndex(index));
655   }
656 
657   // Allocate a number of buckets.
AllocateStorage(size_t num_buckets)658   void AllocateStorage(size_t num_buckets) {
659     num_buckets_ = num_buckets;
660     data_ = allocfn_.allocate(num_buckets_);
661     owns_data_ = true;
662     for (size_t i = 0; i < num_buckets_; ++i) {
663       allocfn_.construct(allocfn_.address(data_[i]));
664       emptyfn_.MakeEmpty(data_[i]);
665     }
666   }
667 
DeallocateStorage()668   void DeallocateStorage() {
669     if (owns_data_) {
670       for (size_t i = 0; i < NumBuckets(); ++i) {
671         allocfn_.destroy(allocfn_.address(data_[i]));
672       }
673       if (data_ != nullptr) {
674         allocfn_.deallocate(data_, NumBuckets());
675       }
676       owns_data_ = false;
677     }
678     data_ = nullptr;
679     num_buckets_ = 0;
680   }
681 
682   // Expand the set based on the load factors.
Expand()683   void Expand() {
684     size_t min_index = static_cast<size_t>(size() / min_load_factor_);
685     // Resize based on the minimum load factor.
686     Resize(min_index);
687   }
688 
689   // Expand / shrink the table to the new specified size.
Resize(size_t new_size)690   void Resize(size_t new_size) {
691     if (new_size < kMinBuckets) {
692       new_size = kMinBuckets;
693     }
694     DCHECK_GE(new_size, size());
695     T* const old_data = data_;
696     size_t old_num_buckets = num_buckets_;
697     // Reinsert all of the old elements.
698     const bool owned_data = owns_data_;
699     AllocateStorage(new_size);
700     for (size_t i = 0; i < old_num_buckets; ++i) {
701       T& element = old_data[i];
702       if (!emptyfn_.IsEmpty(element)) {
703         data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
704       }
705       if (owned_data) {
706         allocfn_.destroy(allocfn_.address(element));
707       }
708     }
709     if (owned_data) {
710       allocfn_.deallocate(old_data, old_num_buckets);
711     }
712 
713     // When we hit elements_until_expand_, we are at the max load factor and must expand again.
714     elements_until_expand_ = NumBuckets() * max_load_factor_;
715   }
716 
FirstAvailableSlot(size_t index)717   ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
718     DCHECK_LT(index, NumBuckets());  // Don't try to get a slot out of range.
719     size_t non_empty_count = 0;
720     while (!emptyfn_.IsEmpty(data_[index])) {
721       index = NextIndex(index);
722       non_empty_count++;
723       DCHECK_LE(non_empty_count, NumBuckets());  // Don't loop forever.
724     }
725     return index;
726   }
727 
NextNonEmptySlot(size_t index)728   size_t NextNonEmptySlot(size_t index) const {
729     const size_t num_buckets = NumBuckets();
730     DCHECK_LT(index, num_buckets);
731     do {
732       ++index;
733     } while (index < num_buckets && IsFreeSlot(index));
734     return index;
735   }
736 
737   // Return new offset.
738   template <typename Elem>
WriteToBytes(uint8_t * ptr,size_t offset,Elem n)739   static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
740     DCHECK_ALIGNED(ptr + offset, sizeof(n));
741     if (ptr != nullptr) {
742       *reinterpret_cast<Elem*>(ptr + offset) = n;
743     }
744     return offset + sizeof(n);
745   }
746 
747   template <typename Elem>
ReadFromBytes(const uint8_t * ptr,size_t offset,Elem * out)748   static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
749     DCHECK(ptr != nullptr);
750     DCHECK_ALIGNED(ptr + offset, sizeof(*out));
751     *out = *reinterpret_cast<const Elem*>(ptr + offset);
752     return offset + sizeof(*out);
753   }
754 
755   Alloc allocfn_;  // Allocator function.
756   HashFn hashfn_;  // Hashing function.
757   EmptyFn emptyfn_;  // IsEmpty/SetEmpty function.
758   Pred pred_;  // Equals function.
759   size_t num_elements_;  // Number of inserted elements.
760   size_t num_buckets_;  // Number of hash table buckets.
761   size_t elements_until_expand_;  // Maximum number of elements until we expand the table.
762   bool owns_data_;  // If we own data_ and are responsible for freeing it.
763   T* data_;  // Backing storage.
764   double min_load_factor_;
765   double max_load_factor_;
766 
767   template <class Elem, class HashSetType>
768   friend class HashSetIterator;
769 
770   ART_FRIEND_TEST(InternTableTest, CrossHash);
771 };
772 
773 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
swap(HashSet<T,EmptyFn,HashFn,Pred,Alloc> & lhs,HashSet<T,EmptyFn,HashFn,Pred,Alloc> & rhs)774 void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
775           HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
776   lhs.swap(rhs);
777 }
778 
779 }  // namespace art
780 
781 #endif  // ART_LIBARTBASE_BASE_HASH_SET_H_
782