1 /* 2 * Copyright (C) 2014, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_TRIE_MAP_H 18 #define LATINIME_TRIE_MAP_H 19 20 #include <climits> 21 #include <cstdint> 22 #include <cstdio> 23 #include <vector> 24 25 #include "defines.h" 26 #include "dictionary/utils/buffer_with_extendable_buffer.h" 27 #include "utils/byte_array_view.h" 28 29 namespace latinime { 30 31 /** 32 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie. 33 * key is int and value is uint64_t. 34 * This supports multiple level map. Terminal entries can have a bitmap for the next level map. 35 * This doesn't support root map resizing. 36 */ 37 class TrieMap { 38 public: 39 struct Result { 40 const uint64_t mValue; 41 const bool mIsValid; 42 const int mNextLevelBitmapEntryIndex; 43 ResultResult44 Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex) 45 : mValue(value), mIsValid(isValid), 46 mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {} 47 }; 48 49 /** 50 * Struct to record iteration state in a table. 51 */ 52 struct TableIterationState { 53 int mTableSize; 54 int mTableIndex; 55 int mCurrentIndex; 56 TableIterationStateTableIterationState57 TableIterationState(const int tableSize, const int tableIndex) 58 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {} 59 }; 60 61 class TrieMapRange; 62 class TrieMapIterator { 63 public: 64 class IterationResult { 65 public: IterationResult(const TrieMap * const trieMap,const int key,const uint64_t value,const int nextLeveBitmapEntryIndex)66 IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value, 67 const int nextLeveBitmapEntryIndex) 68 : mTrieMap(trieMap), mKey(key), mValue(value), 69 mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {} 70 getEntriesInNextLevel()71 const TrieMapRange getEntriesInNextLevel() const { 72 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex); 73 } 74 hasNextLevelMap()75 bool hasNextLevelMap() const { 76 return mNextLevelBitmapEntryIndex != INVALID_INDEX; 77 } 78 key()79 AK_FORCE_INLINE int key() const { 80 return mKey; 81 } 82 value()83 AK_FORCE_INLINE uint64_t value() const { 84 return mValue; 85 } 86 getNextLevelBitmapEntryIndex()87 AK_FORCE_INLINE int getNextLevelBitmapEntryIndex() const { 88 return mNextLevelBitmapEntryIndex; 89 } 90 91 private: 92 const TrieMap *const mTrieMap; 93 const int mKey; 94 const uint64_t mValue; 95 const int mNextLevelBitmapEntryIndex; 96 }; 97 TrieMapIterator(const TrieMap * const trieMap,const int bitmapEntryIndex)98 TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex) 99 : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex), 100 mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) { 101 if (!trieMap || mBaseBitmapEntryIndex == INVALID_INDEX) { 102 return; 103 } 104 const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex); 105 mStateStack.emplace_back( 106 mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex()); 107 this->operator++(); 108 } 109 110 const IterationResult operator*() const { 111 return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex); 112 } 113 114 bool operator!=(const TrieMapIterator &other) const { 115 // Caveat: This works only for for loops. 116 return mIsValid || other.mIsValid; 117 } 118 119 const TrieMapIterator &operator++() { 120 const Result result = mTrieMap->iterateNext(&mStateStack, &mKey); 121 mValue = result.mValue; 122 mIsValid = result.mIsValid; 123 mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex; 124 return *this; 125 } 126 127 private: 128 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator); 129 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator); 130 131 const TrieMap *const mTrieMap; 132 std::vector<TrieMap::TableIterationState> mStateStack; 133 const int mBaseBitmapEntryIndex; 134 int mKey; 135 uint64_t mValue; 136 bool mIsValid; 137 int mNextLevelBitmapEntryIndex; 138 }; 139 140 /** 141 * Class to support iterating entries in TrieMap by range base for loops. 142 */ 143 class TrieMapRange { 144 public: TrieMapRange(const TrieMap * const trieMap,const int bitmapEntryIndex)145 TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex) 146 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {}; 147 begin()148 TrieMapIterator begin() const { 149 return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex); 150 } 151 end()152 const TrieMapIterator end() const { 153 return TrieMapIterator(nullptr, INVALID_INDEX); 154 } 155 156 private: 157 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange); 158 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange); 159 160 const TrieMap *const mTrieMap; 161 const int mBaseBitmapEntryIndex; 162 }; 163 164 static const int INVALID_INDEX; 165 static const uint64_t MAX_VALUE; 166 167 TrieMap(); 168 // Construct TrieMap using existing data in the memory region written by save(). 169 TrieMap(const ReadWriteByteArrayView buffer); 170 void dump(const int from = 0, const int to = 0) const; 171 isNearSizeLimit()172 bool isNearSizeLimit() const { 173 return mBuffer.isNearSizeLimit(); 174 } 175 getRootBitmapEntryIndex()176 int getRootBitmapEntryIndex() const { 177 return ROOT_BITMAP_ENTRY_INDEX; 178 } 179 180 // Returns bitmapEntryIndex. Create the next level map if it doesn't exist. getNextLevelBitmapEntryIndex(const int key)181 int getNextLevelBitmapEntryIndex(const int key) { 182 return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); 183 } 184 185 int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex); 186 getRoot(const int key)187 const Result getRoot(const int key) const { 188 return get(key, ROOT_BITMAP_ENTRY_INDEX); 189 } 190 191 const Result get(const int key, const int bitmapEntryIndex) const; 192 putRoot(const int key,const uint64_t value)193 bool putRoot(const int key, const uint64_t value) { 194 return put(key, value, ROOT_BITMAP_ENTRY_INDEX); 195 } 196 197 bool put(const int key, const uint64_t value, const int bitmapEntryIndex); 198 getEntriesInRootLevel()199 const TrieMapRange getEntriesInRootLevel() const { 200 return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX); 201 } 202 getEntriesInSpecifiedLevel(const int bitmapEntryIndex)203 const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const { 204 return TrieMapRange(this, bitmapEntryIndex); 205 } 206 207 bool save(FILE *const file) const; 208 209 bool remove(const int key, const int bitmapEntryIndex); 210 211 private: 212 DISALLOW_COPY_AND_ASSIGN(TrieMap); 213 214 /** 215 * Struct represents an entry. 216 * 217 * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and 218 * FIELD_1. 219 * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table. 220 * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE) 221 * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal 222 * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map 223 * for the key. 224 * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK)) 225 * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and 226 * lower order bytes are stored in FIELD_1. 227 * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes)) 228 */ 229 struct Entry { 230 const uint32_t mData0; 231 const uint32_t mData1; 232 EntryEntry233 Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {} 234 isBitmapEntryEntry235 AK_FORCE_INLINE bool isBitmapEntry() const { 236 return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0; 237 } 238 hasTerminalLinkEntry239 AK_FORCE_INLINE bool hasTerminalLink() const { 240 return (mData1 & TERMINAL_LINK_FLAG) != 0; 241 } 242 243 // For terminal entry. getKeyEntry244 AK_FORCE_INLINE uint32_t getKey() const { 245 return mData0; 246 } 247 248 // For terminal entry. getValueEntry249 AK_FORCE_INLINE uint32_t getValue() const { 250 return mData1 & VALUE_MASK; 251 } 252 253 // For terminal entry. isValidTerminalEntryEntry254 AK_FORCE_INLINE bool isValidTerminalEntry() const { 255 return hasTerminalLink() || ((mData1 & VALUE_MASK) != INVALID_VALUE_IN_KEY_VALUE_ENTRY); 256 } 257 258 // For terminal entry. getValueEntryIndexEntry259 AK_FORCE_INLINE uint32_t getValueEntryIndex() const { 260 return mData1 & TERMINAL_LINK_MASK; 261 } 262 263 // For bitmap entry. getBitmapEntry264 AK_FORCE_INLINE uint32_t getBitmap() const { 265 return mData0; 266 } 267 268 // For bitmap entry. getTableIndexEntry269 AK_FORCE_INLINE int getTableIndex() const { 270 return static_cast<int>(mData1); 271 } 272 273 // For value entry. getValueOfValueEntryEntry274 AK_FORCE_INLINE uint64_t getValueOfValueEntry() const { 275 return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1); 276 } 277 }; 278 279 BufferWithExtendableBuffer mBuffer; 280 281 static const int FIELD0_SIZE; 282 static const int FIELD1_SIZE; 283 static const int ENTRY_SIZE; 284 static const uint32_t VALUE_FLAG; 285 static const uint32_t VALUE_MASK; 286 static const uint32_t INVALID_VALUE_IN_KEY_VALUE_ENTRY; 287 static const uint32_t TERMINAL_LINK_FLAG; 288 static const uint32_t TERMINAL_LINK_MASK; 289 static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL; 290 static const uint32_t LABEL_MASK; 291 static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; 292 static const int ROOT_BITMAP_ENTRY_INDEX; 293 static const int ROOT_BITMAP_ENTRY_POS; 294 static const Entry EMPTY_BITMAP_ENTRY; 295 static const int TERMINAL_LINKED_ENTRY_COUNT; 296 static const int MAX_BUFFER_SIZE; 297 298 uint32_t getBitShuffledKey(const uint32_t key) const; 299 bool writeValue(const uint64_t value, const int terminalEntryIndex); 300 bool updateValue(const Entry &terminalEntry, const uint64_t value, 301 const int terminalEntryIndex); 302 bool freeTable(const int tableIndex, const int entryCount); 303 int allocateTable(const int entryCount); 304 int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, 305 const Entry &bitmapEntry, const int level) const; 306 const Result getInternal(const uint32_t key, const uint32_t hashedKey, 307 const int bitmapEntryIndex, const int level) const; 308 bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey, 309 const int bitmapEntryIndex, const Entry &bitmapEntry, const int level); 310 bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value, 311 const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex, 312 const int level); 313 bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, 314 const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, 315 const int label); 316 const Result iterateNext(std::vector<TableIterationState> *const iterationState, 317 int *const outKey) const; 318 readEntry(const int entryIndex)319 AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const { 320 return Entry(readField0(entryIndex), readField1(entryIndex)); 321 } 322 323 // Returns whether an entry for the index is existing by testing if the index-th bit in the 324 // bitmap is set or not. exists(const uint32_t bitmap,const int index)325 AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const { 326 return (bitmap & (1 << index)) != 0; 327 } 328 329 // Set index-th bit in the bitmap. setExist(const uint32_t bitmap,const int index)330 AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const { 331 return bitmap | (1 << index); 332 } 333 334 // Count set bits before index in the bitmap. popCount(const uint32_t bitmap,const int index)335 AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const { 336 return popCount(bitmap & ((1 << index) - 1)); 337 } 338 339 // Count set bits in the bitmap. popCount(const uint32_t bitmap)340 AK_FORCE_INLINE int popCount(const uint32_t bitmap) const { 341 return __builtin_popcount(bitmap); 342 // int v = bitmap - ((bitmap >> 1) & 0x55555555); 343 // v = (v & 0x33333333) + ((v >> 2) & 0x33333333); 344 // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; 345 } 346 getLabel(const uint32_t hashedKey,const int level)347 AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const { 348 return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK; 349 } 350 readField0(const int entryIndex)351 AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const { 352 return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); 353 } 354 readField1(const int entryIndex)355 AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const { 356 return mBuffer.readUint(FIELD1_SIZE, 357 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); 358 } 359 readEmptyTableLink(const int entryCount)360 AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const { 361 return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); 362 } 363 writeEmptyTableLink(const int tableIndex,const int entryCount)364 AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) { 365 return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); 366 } 367 writeField0(const uint32_t data,const int entryIndex)368 AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) { 369 return mBuffer.writeUint(data, FIELD0_SIZE, 370 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); 371 } 372 writeField1(const uint32_t data,const int entryIndex)373 AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) { 374 return mBuffer.writeUint(data, FIELD1_SIZE, 375 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); 376 } 377 writeEntry(const Entry & entry,const int entryIndex)378 AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) { 379 return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex); 380 } 381 writeTerminalEntry(const uint32_t key,const uint64_t value,const int entryIndex)382 AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value, 383 const int entryIndex) { 384 return writeField0(key, entryIndex) && writeValue(value, entryIndex); 385 } 386 copyEntry(const int originalEntryIndex,const int newEntryIndex)387 AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) { 388 return writeEntry(readEntry(originalEntryIndex), newEntryIndex); 389 } 390 getTailEntryIndex()391 AK_FORCE_INLINE int getTailEntryIndex() const { 392 return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE; 393 } 394 395 bool removeInner(const Entry &bitmapEntry); 396 }; 397 398 } // namespace latinime 399 #endif /* LATINIME_TRIE_MAP_H */ 400