1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_MULTI_BIGRAM_MAP_H
18 #define LATINIME_MULTI_BIGRAM_MAP_H
19 
20 #include <cstddef>
21 #include <unordered_map>
22 
23 #include "defines.h"
24 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
25 #include "dictionary/interface/ngram_listener.h"
26 #include "dictionary/utils/binary_dictionary_bigrams_iterator.h"
27 #include "dictionary/utils/bloom_filter.h"
28 #include "utils/int_array_view.h"
29 
30 namespace latinime {
31 
32 // Class for caching bigram maps for multiple previous word contexts. This is useful since the
33 // algorithm needs to look up the set of bigrams for every word pair that occurs in every
34 // multi-word suggestion.
35 class MultiBigramMap {
36  public:
MultiBigramMap()37     MultiBigramMap() : mBigramMaps() {}
~MultiBigramMap()38     ~MultiBigramMap() {}
39 
40     // Look up the bigram probability for the given word pair from the cached bigram maps.
41     // Also caches the bigrams if there is space remaining and they have not been cached already.
42     int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
43             const WordIdArrayView prevWordIds, const int nextWordId, const int unigramProbability);
44 
clear()45     void clear() {
46         mBigramMaps.clear();
47     }
48 
49  private:
50     DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);
51 
52     class BigramMap : public NgramListener {
53      public:
BigramMap()54         BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
55         // Copy constructor needed for std::unordered_map.
BigramMap(const BigramMap & bigramMap)56         BigramMap(const BigramMap &bigramMap)
57                 : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {}
~BigramMap()58         virtual ~BigramMap() {}
59 
60         void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
61                 const WordIdArrayView prevWordIds);
62         int getBigramProbability(
63                 const DictionaryStructureWithBufferPolicy *const structurePolicy,
64                 const int nextWordId, const int unigramProbability) const;
65         virtual void onVisitEntry(const int ngramProbability, const int targetWordId);
66 
67      private:
68         static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
69         std::unordered_map<int, int> mBigramMap;
70         BloomFilter mBloomFilter;
71     };
72 
73     void addBigramsForWord(const DictionaryStructureWithBufferPolicy *const structurePolicy,
74             const WordIdArrayView prevWordIds);
75 
76     int readBigramProbabilityFromBinaryDictionary(
77             const DictionaryStructureWithBufferPolicy *const structurePolicy,
78             const WordIdArrayView prevWordIds, const int nextWordId, const int unigramProbability);
79 
80     static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
81     std::unordered_map<int, BigramMap> mBigramMaps;
82 };
83 } // namespace latinime
84 #endif // LATINIME_MULTI_BIGRAM_MAP_H
85