1 /*
2  * Copyright (C) 2014, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_TRIE_MAP_H
18 #define LATINIME_TRIE_MAP_H
19 
20 #include <climits>
21 #include <cstdint>
22 #include <cstdio>
23 #include <vector>
24 
25 #include "defines.h"
26 #include "dictionary/utils/buffer_with_extendable_buffer.h"
27 #include "utils/byte_array_view.h"
28 
29 namespace latinime {
30 
31 /**
32  * Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
33  * key is int and value is uint64_t.
34  * This supports multiple level map. Terminal entries can have a bitmap for the next level map.
35  * This doesn't support root map resizing.
36  */
37 class TrieMap {
38  public:
39     struct Result {
40         const uint64_t mValue;
41         const bool mIsValid;
42         const int mNextLevelBitmapEntryIndex;
43 
ResultResult44         Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex)
45                 : mValue(value), mIsValid(isValid),
46                   mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
47     };
48 
49     /**
50      * Struct to record iteration state in a table.
51      */
52     struct TableIterationState {
53         int mTableSize;
54         int mTableIndex;
55         int mCurrentIndex;
56 
TableIterationStateTableIterationState57         TableIterationState(const int tableSize, const int tableIndex)
58                 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {}
59     };
60 
61     class TrieMapRange;
62     class TrieMapIterator {
63      public:
64         class IterationResult {
65          public:
IterationResult(const TrieMap * const trieMap,const int key,const uint64_t value,const int nextLeveBitmapEntryIndex)66             IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value,
67                     const int nextLeveBitmapEntryIndex)
68                     : mTrieMap(trieMap), mKey(key), mValue(value),
69                       mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {}
70 
getEntriesInNextLevel()71             const TrieMapRange getEntriesInNextLevel() const {
72                 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex);
73             }
74 
hasNextLevelMap()75             bool hasNextLevelMap() const {
76                 return mNextLevelBitmapEntryIndex != INVALID_INDEX;
77             }
78 
key()79             AK_FORCE_INLINE int key() const {
80                 return mKey;
81             }
82 
value()83             AK_FORCE_INLINE uint64_t value() const {
84                 return mValue;
85             }
86 
getNextLevelBitmapEntryIndex()87             AK_FORCE_INLINE int getNextLevelBitmapEntryIndex() const {
88                 return mNextLevelBitmapEntryIndex;
89             }
90 
91          private:
92             const TrieMap *const mTrieMap;
93             const int mKey;
94             const uint64_t mValue;
95             const int mNextLevelBitmapEntryIndex;
96         };
97 
TrieMapIterator(const TrieMap * const trieMap,const int bitmapEntryIndex)98         TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex)
99                 : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex),
100                   mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) {
101             if (!trieMap || mBaseBitmapEntryIndex == INVALID_INDEX) {
102                 return;
103             }
104             const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex);
105             mStateStack.emplace_back(
106                     mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex());
107             this->operator++();
108         }
109 
110         const IterationResult operator*() const {
111             return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex);
112         }
113 
114         bool operator!=(const TrieMapIterator &other) const {
115             // Caveat: This works only for for loops.
116             return mIsValid || other.mIsValid;
117         }
118 
119         const TrieMapIterator &operator++() {
120             const Result result = mTrieMap->iterateNext(&mStateStack, &mKey);
121             mValue = result.mValue;
122             mIsValid = result.mIsValid;
123             mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
124             return *this;
125         }
126 
127      private:
128         DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator);
129         DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator);
130 
131         const TrieMap *const mTrieMap;
132         std::vector<TrieMap::TableIterationState> mStateStack;
133         const int mBaseBitmapEntryIndex;
134         int mKey;
135         uint64_t mValue;
136         bool mIsValid;
137         int mNextLevelBitmapEntryIndex;
138     };
139 
140     /**
141      * Class to support iterating entries in TrieMap by range base for loops.
142      */
143     class TrieMapRange {
144      public:
TrieMapRange(const TrieMap * const trieMap,const int bitmapEntryIndex)145         TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex)
146                 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {};
147 
begin()148         TrieMapIterator begin() const {
149             return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex);
150         }
151 
end()152         const TrieMapIterator end() const {
153             return TrieMapIterator(nullptr, INVALID_INDEX);
154         }
155 
156      private:
157         DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange);
158         DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange);
159 
160         const TrieMap *const mTrieMap;
161         const int mBaseBitmapEntryIndex;
162     };
163 
164     static const int INVALID_INDEX;
165     static const uint64_t MAX_VALUE;
166 
167     TrieMap();
168     // Construct TrieMap using existing data in the memory region written by save().
169     TrieMap(const ReadWriteByteArrayView buffer);
170     void dump(const int from = 0, const int to = 0) const;
171 
isNearSizeLimit()172     bool isNearSizeLimit() const {
173         return mBuffer.isNearSizeLimit();
174     }
175 
getRootBitmapEntryIndex()176     int getRootBitmapEntryIndex() const {
177         return ROOT_BITMAP_ENTRY_INDEX;
178     }
179 
180     // Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
getNextLevelBitmapEntryIndex(const int key)181     int getNextLevelBitmapEntryIndex(const int key) {
182         return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
183     }
184 
185     int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
186 
getRoot(const int key)187     const Result getRoot(const int key) const {
188         return get(key, ROOT_BITMAP_ENTRY_INDEX);
189     }
190 
191     const Result get(const int key, const int bitmapEntryIndex) const;
192 
putRoot(const int key,const uint64_t value)193     bool putRoot(const int key, const uint64_t value) {
194         return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
195     }
196 
197     bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
198 
getEntriesInRootLevel()199     const TrieMapRange getEntriesInRootLevel() const {
200         return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX);
201     }
202 
getEntriesInSpecifiedLevel(const int bitmapEntryIndex)203     const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const {
204         return TrieMapRange(this, bitmapEntryIndex);
205     }
206 
207     bool save(FILE *const file) const;
208 
209     bool remove(const int key, const int bitmapEntryIndex);
210 
211  private:
212     DISALLOW_COPY_AND_ASSIGN(TrieMap);
213 
214     /**
215      * Struct represents an entry.
216      *
217      * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and
218      * FIELD_1.
219      * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
220      *   FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
221      * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal
222      * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map
223      * for the key.
224      *   FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK))
225      * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and
226      * lower order bytes are stored in FIELD_1.
227      *   FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
228      */
229     struct Entry {
230         const uint32_t mData0;
231         const uint32_t mData1;
232 
EntryEntry233         Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {}
234 
isBitmapEntryEntry235         AK_FORCE_INLINE bool isBitmapEntry() const {
236             return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0;
237         }
238 
hasTerminalLinkEntry239         AK_FORCE_INLINE bool hasTerminalLink() const {
240             return (mData1 & TERMINAL_LINK_FLAG) != 0;
241         }
242 
243         // For terminal entry.
getKeyEntry244         AK_FORCE_INLINE uint32_t getKey() const {
245             return mData0;
246         }
247 
248         // For terminal entry.
getValueEntry249         AK_FORCE_INLINE uint32_t getValue() const {
250             return mData1 & VALUE_MASK;
251         }
252 
253         // For terminal entry.
isValidTerminalEntryEntry254         AK_FORCE_INLINE bool isValidTerminalEntry() const {
255             return hasTerminalLink() || ((mData1 & VALUE_MASK) != INVALID_VALUE_IN_KEY_VALUE_ENTRY);
256         }
257 
258         // For terminal entry.
getValueEntryIndexEntry259         AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
260             return mData1 & TERMINAL_LINK_MASK;
261         }
262 
263         // For bitmap entry.
getBitmapEntry264         AK_FORCE_INLINE uint32_t getBitmap() const {
265             return mData0;
266         }
267 
268         // For bitmap entry.
getTableIndexEntry269         AK_FORCE_INLINE int getTableIndex() const {
270             return static_cast<int>(mData1);
271         }
272 
273         // For value entry.
getValueOfValueEntryEntry274         AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
275             return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1);
276         }
277     };
278 
279     BufferWithExtendableBuffer mBuffer;
280 
281     static const int FIELD0_SIZE;
282     static const int FIELD1_SIZE;
283     static const int ENTRY_SIZE;
284     static const uint32_t VALUE_FLAG;
285     static const uint32_t VALUE_MASK;
286     static const uint32_t INVALID_VALUE_IN_KEY_VALUE_ENTRY;
287     static const uint32_t TERMINAL_LINK_FLAG;
288     static const uint32_t TERMINAL_LINK_MASK;
289     static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
290     static const uint32_t LABEL_MASK;
291     static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
292     static const int ROOT_BITMAP_ENTRY_INDEX;
293     static const int ROOT_BITMAP_ENTRY_POS;
294     static const Entry EMPTY_BITMAP_ENTRY;
295     static const int TERMINAL_LINKED_ENTRY_COUNT;
296     static const int MAX_BUFFER_SIZE;
297 
298     uint32_t getBitShuffledKey(const uint32_t key) const;
299     bool writeValue(const uint64_t value, const int terminalEntryIndex);
300     bool updateValue(const Entry &terminalEntry, const uint64_t value,
301             const int terminalEntryIndex);
302     bool freeTable(const int tableIndex, const int entryCount);
303     int allocateTable(const int entryCount);
304     int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
305             const Entry &bitmapEntry, const int level) const;
306     const Result getInternal(const uint32_t key, const uint32_t hashedKey,
307             const int bitmapEntryIndex, const int level) const;
308     bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
309             const int bitmapEntryIndex, const Entry &bitmapEntry, const int level);
310     bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
311             const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
312             const int level);
313     bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
314             const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex,
315             const int label);
316     const Result iterateNext(std::vector<TableIterationState> *const iterationState,
317             int *const outKey) const;
318 
readEntry(const int entryIndex)319     AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
320         return Entry(readField0(entryIndex), readField1(entryIndex));
321     }
322 
323     // Returns whether an entry for the index is existing by testing if the index-th bit in the
324     // bitmap is set or not.
exists(const uint32_t bitmap,const int index)325     AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
326         return (bitmap & (1 << index)) != 0;
327     }
328 
329     // Set index-th bit in the bitmap.
setExist(const uint32_t bitmap,const int index)330     AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const {
331         return bitmap | (1 << index);
332     }
333 
334     // Count set bits before index in the bitmap.
popCount(const uint32_t bitmap,const int index)335     AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
336         return popCount(bitmap & ((1 << index) - 1));
337     }
338 
339     // Count set bits in the bitmap.
popCount(const uint32_t bitmap)340     AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
341         return __builtin_popcount(bitmap);
342         // int v = bitmap - ((bitmap >> 1) & 0x55555555);
343         // v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
344         // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
345     }
346 
getLabel(const uint32_t hashedKey,const int level)347     AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const {
348         return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK;
349     }
350 
readField0(const int entryIndex)351     AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
352         return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
353     }
354 
readField1(const int entryIndex)355     AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
356         return mBuffer.readUint(FIELD1_SIZE,
357                 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
358     }
359 
readEmptyTableLink(const int entryCount)360     AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
361         return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
362     }
363 
writeEmptyTableLink(const int tableIndex,const int entryCount)364     AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) {
365         return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
366     }
367 
writeField0(const uint32_t data,const int entryIndex)368     AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
369         return mBuffer.writeUint(data, FIELD0_SIZE,
370                 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
371     }
372 
writeField1(const uint32_t data,const int entryIndex)373     AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
374         return mBuffer.writeUint(data, FIELD1_SIZE,
375                 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
376     }
377 
writeEntry(const Entry & entry,const int entryIndex)378     AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) {
379         return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex);
380     }
381 
writeTerminalEntry(const uint32_t key,const uint64_t value,const int entryIndex)382     AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value,
383             const int entryIndex) {
384         return writeField0(key, entryIndex) && writeValue(value, entryIndex);
385     }
386 
copyEntry(const int originalEntryIndex,const int newEntryIndex)387     AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) {
388         return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
389     }
390 
getTailEntryIndex()391     AK_FORCE_INLINE int getTailEntryIndex() const {
392         return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
393     }
394 
395     bool removeInner(const Entry &bitmapEntry);
396 };
397 
398 } // namespace latinime
399 #endif /* LATINIME_TRIE_MAP_H */
400