1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_MULTI_BIGRAM_MAP_H 18 #define LATINIME_MULTI_BIGRAM_MAP_H 19 20 #include <cstddef> 21 #include <unordered_map> 22 23 #include "defines.h" 24 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h" 25 #include "dictionary/interface/ngram_listener.h" 26 #include "dictionary/utils/binary_dictionary_bigrams_iterator.h" 27 #include "dictionary/utils/bloom_filter.h" 28 #include "utils/int_array_view.h" 29 30 namespace latinime { 31 32 // Class for caching bigram maps for multiple previous word contexts. This is useful since the 33 // algorithm needs to look up the set of bigrams for every word pair that occurs in every 34 // multi-word suggestion. 35 class MultiBigramMap { 36 public: MultiBigramMap()37 MultiBigramMap() : mBigramMaps() {} ~MultiBigramMap()38 ~MultiBigramMap() {} 39 40 // Look up the bigram probability for the given word pair from the cached bigram maps. 41 // Also caches the bigrams if there is space remaining and they have not been cached already. 42 int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, 43 const WordIdArrayView prevWordIds, const int nextWordId, const int unigramProbability); 44 clear()45 void clear() { 46 mBigramMaps.clear(); 47 } 48 49 private: 50 DISALLOW_COPY_AND_ASSIGN(MultiBigramMap); 51 52 class BigramMap : public NgramListener { 53 public: BigramMap()54 BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {} 55 // Copy constructor needed for std::unordered_map. BigramMap(const BigramMap & bigramMap)56 BigramMap(const BigramMap &bigramMap) 57 : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {} ~BigramMap()58 virtual ~BigramMap() {} 59 60 void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, 61 const WordIdArrayView prevWordIds); 62 int getBigramProbability( 63 const DictionaryStructureWithBufferPolicy *const structurePolicy, 64 const int nextWordId, const int unigramProbability) const; 65 virtual void onVisitEntry(const int ngramProbability, const int targetWordId); 66 67 private: 68 static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP; 69 std::unordered_map<int, int> mBigramMap; 70 BloomFilter mBloomFilter; 71 }; 72 73 void addBigramsForWord(const DictionaryStructureWithBufferPolicy *const structurePolicy, 74 const WordIdArrayView prevWordIds); 75 76 int readBigramProbabilityFromBinaryDictionary( 77 const DictionaryStructureWithBufferPolicy *const structurePolicy, 78 const WordIdArrayView prevWordIds, const int nextWordId, const int unigramProbability); 79 80 static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; 81 std::unordered_map<int, BigramMap> mBigramMaps; 82 }; 83 } // namespace latinime 84 #endif // LATINIME_MULTI_BIGRAM_MAP_H 85