1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_PATRICIA_TRIE_POLICY_H
18 #define LATINIME_PATRICIA_TRIE_POLICY_H
19 
20 #include <cstdint>
21 #include <vector>
22 
23 #include "defines.h"
24 #include "dictionary/header/header_policy.h"
25 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
26 #include "dictionary/structure/v2/bigram/bigram_list_policy.h"
27 #include "dictionary/structure/v2/shortcut/shortcut_list_policy.h"
28 #include "dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
29 #include "dictionary/structure/v2/ver2_pt_node_array_reader.h"
30 #include "dictionary/utils/format_utils.h"
31 #include "dictionary/utils/mmapped_buffer.h"
32 #include "utils/byte_array_view.h"
33 #include "utils/int_array_view.h"
34 
35 namespace latinime {
36 
37 class DicNode;
38 class DicNodeVector;
39 
40 // Word id = Position of a PtNode that represents the word.
41 // Max supported n-gram is bigram.
42 class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
43  public:
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)44     PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
45             : mMmappedBuffer(std::move(mmappedBuffer)),
46               mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
47                       FormatUtils::detectFormatVersion(mMmappedBuffer->getReadOnlyByteArrayView())),
48               mBuffer(mMmappedBuffer->getReadOnlyByteArrayView().skip(mHeaderPolicy.getSize())),
49               mBigramListPolicy(mBuffer), mShortcutListPolicy(mBuffer),
50               mPtNodeReader(mBuffer, &mBigramListPolicy, &mShortcutListPolicy,
51                       mHeaderPolicy.getCodePointTable()),
52               mPtNodeArrayReader(mBuffer), mTerminalPtNodePositionsForIteratingWords(),
53               mIsCorrupted(false) {}
54 
getRootPosition()55     AK_FORCE_INLINE int getRootPosition() const {
56         return 0;
57     }
58 
59     void createAndGetAllChildDicNodes(const DicNode *const dicNode,
60             DicNodeVector *const childDicNodes) const;
61 
62     int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount,
63             int *const outCodePoints) const;
64 
65     int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
66 
67     const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds,
68             const int wordId, MultiBigramMap *const multiBigramMap) const;
69 
70     int getProbability(const int unigramProbability, const int bigramProbability) const;
71 
72     int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const;
73 
74     void iterateNgramEntries(const WordIdArrayView prevWordIds,
75             NgramListener *const listener) const;
76 
77     BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const;
78 
getHeaderStructurePolicy()79     const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
80         return &mHeaderPolicy;
81     }
82 
addUnigramEntry(const CodePointArrayView wordCodePoints,const UnigramProperty * const unigramProperty)83     bool addUnigramEntry(const CodePointArrayView wordCodePoints,
84             const UnigramProperty *const unigramProperty) {
85         // This method should not be called for non-updatable dictionary.
86         AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
87         return false;
88     }
89 
removeUnigramEntry(const CodePointArrayView wordCodePoints)90     bool removeUnigramEntry(const CodePointArrayView wordCodePoints) {
91         // This method should not be called for non-updatable dictionary.
92         AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
93         return false;
94     }
95 
addNgramEntry(const NgramProperty * const ngramProperty)96     bool addNgramEntry(const NgramProperty *const ngramProperty) {
97         // This method should not be called for non-updatable dictionary.
98         AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
99         return false;
100     }
101 
removeNgramEntry(const NgramContext * const ngramContext,const CodePointArrayView wordCodePoints)102     bool removeNgramEntry(const NgramContext *const ngramContext,
103             const CodePointArrayView wordCodePoints) {
104         // This method should not be called for non-updatable dictionary.
105         AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
106         return false;
107     }
108 
updateEntriesForWordWithNgramContext(const NgramContext * const ngramContext,const CodePointArrayView wordCodePoints,const bool isValidWord,const HistoricalInfo historicalInfo)109     bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
110             const CodePointArrayView wordCodePoints, const bool isValidWord,
111             const HistoricalInfo historicalInfo) {
112         // This method should not be called for non-updatable dictionary.
113         AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
114                 "dictionary.");
115         return false;
116     }
117 
flush(const char * const filePath)118     bool flush(const char *const filePath) {
119         // This method should not be called for non-updatable dictionary.
120         AKLOGI("Warning: flush() is called for non-updatable dictionary.");
121         return false;
122     }
123 
flushWithGC(const char * const filePath)124     bool flushWithGC(const char *const filePath) {
125         // This method should not be called for non-updatable dictionary.
126         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
127         return false;
128     }
129 
needsToRunGC(const bool mindsBlockByGC)130     bool needsToRunGC(const bool mindsBlockByGC) const {
131         // This method should not be called for non-updatable dictionary.
132         AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
133         return false;
134     }
135 
getProperty(const char * const query,const int queryLength,char * const outResult,const int maxResultLength)136     void getProperty(const char *const query, const int queryLength, char *const outResult,
137             const int maxResultLength) {
138         // getProperty is not supported for this class.
139         if (maxResultLength > 0) {
140             outResult[0] = '\0';
141         }
142     }
143 
144     const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
145 
146     int getNextWordAndNextToken(const int token, int *const outCodePoints,
147             int *const outCodePointCount);
148 
isCorrupted()149     bool isCorrupted() const {
150         return mIsCorrupted;
151     }
152 
153  private:
154     DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
155 
156     const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
157     const HeaderPolicy mHeaderPolicy;
158     const ReadOnlyByteArrayView mBuffer;
159     const BigramListPolicy mBigramListPolicy;
160     const ShortcutListPolicy mShortcutListPolicy;
161     const Ver2ParticiaTrieNodeReader mPtNodeReader;
162     const Ver2PtNodeArrayReader mPtNodeArrayReader;
163     std::vector<int> mTerminalPtNodePositionsForIteratingWords;
164     mutable bool mIsCorrupted;
165 
166     int getCodePointsAndProbabilityAndReturnCodePointCount(const int wordId,
167             const int maxCodePointCount, int *const outCodePoints,
168             int *const outUnigramProbability) const;
169     int getShortcutPositionOfPtNode(const int ptNodePos) const;
170     int getBigramsPositionOfPtNode(const int ptNodePos) const;
171     int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
172             DicNodeVector *const childDicNodes) const;
173     int getWordIdFromTerminalPtNodePos(const int ptNodePos) const;
174     int getTerminalPtNodePosFromWordId(const int wordId) const;
175     const WordAttributes getWordAttributes(const int probability,
176             const PtNodeParams &ptNodeParams) const;
177     bool isValidPos(const int pos) const;
178 };
179 } // namespace latinime
180 #endif // LATINIME_PATRICIA_TRIE_POLICY_H
181