1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
19  * Do not edit this file other than updating policy's interface.
20  *
21  * This file was generated from
22  *   dictionary/structure/v4/ver4_patricia_trie_policy.h
23  */
24 
25 #ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
26 #define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
27 
28 #include <vector>
29 
30 #include "defines.h"
31 #include "dictionary/header/header_policy.h"
32 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
33 #include "dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
34 #include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
35 #include "dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
36 #include "dictionary/structure/backward/v402/ver4_dict_buffers.h"
37 #include "dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
38 #include "dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
39 #include "dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
40 #include "dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
41 #include "dictionary/utils/binary_dictionary_bigrams_iterator.h"
42 #include "dictionary/utils/binary_dictionary_shortcut_iterator.h"
43 #include "dictionary/utils/buffer_with_extendable_buffer.h"
44 #include "dictionary/utils/entry_counters.h"
45 #include "utils/int_array_view.h"
46 
47 namespace latinime {
48 namespace backward {
49 namespace v402 {
50 
51 } // namespace v402
52 } // namespace backward
53 class DicNode;
54 namespace backward {
55 namespace v402 {
56 } // namespace v402
57 } // namespace backward
58 class DicNodeVector;
59 namespace backward {
60 namespace v402 {
61 
62 // Word id = Position of a PtNode that represents the word.
63 // Max supported n-gram is bigram.
64 class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
65  public:
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)66     Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
67             : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
68               mDictBuffer(mBuffers->getWritableTrieBuffer()),
69               mBigramPolicy(mBuffers->getMutableBigramDictContent(),
70                       mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
71               mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
72                       mBuffers->getTerminalPositionLookupTable()),
73               mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
74               mPtNodeArrayReader(mDictBuffer),
75               mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
76                       &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
77               mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
78               mWritingHelper(mBuffers.get()),
79               mEntryCounters(mHeaderPolicy->getNgramCounts().getCountArray()),
80               mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
81 
getRootPosition()82     virtual int getRootPosition() const {
83         return 0;
84     }
85 
86     void createAndGetAllChildDicNodes(const DicNode *const dicNode,
87             DicNodeVector *const childDicNodes) const;
88 
89     int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount,
90             int *const outCodePoints) const;
91 
92     int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
93 
94     const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds,
95             const int wordId, MultiBigramMap *const multiBigramMap) const;
96 
97     int getProbability(const int unigramProbability, const int bigramProbability) const;
98 
99     int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const;
100 
101     void iterateNgramEntries(const WordIdArrayView prevWordIds,
102             NgramListener *const listener) const;
103 
104     BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const;
105 
getHeaderStructurePolicy()106     const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
107         return mHeaderPolicy;
108     }
109 
110     bool addUnigramEntry(const CodePointArrayView wordCodePoints,
111             const UnigramProperty *const unigramProperty);
112 
113     bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
114 
115     bool addNgramEntry(const NgramProperty *const ngramProperty);
116 
117     bool removeNgramEntry(const NgramContext *const ngramContext,
118             const CodePointArrayView wordCodePoints);
119 
120     bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
121             const CodePointArrayView wordCodePoints, const bool isValidWord,
122             const HistoricalInfo historicalInfo);
123 
124     bool flush(const char *const filePath);
125 
126     bool flushWithGC(const char *const filePath);
127 
128     bool needsToRunGC(const bool mindsBlockByGC) const;
129 
130     void getProperty(const char *const query, const int queryLength, char *const outResult,
131             const int maxResultLength);
132 
133     const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
134 
135     int getNextWordAndNextToken(const int token, int *const outCodePoints,
136             int *const outCodePointCount);
137 
isCorrupted()138     bool isCorrupted() const {
139         return mIsCorrupted;
140     }
141 
142  private:
143     DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
144 
145     static const char *const UNIGRAM_COUNT_QUERY;
146     static const char *const BIGRAM_COUNT_QUERY;
147     static const char *const MAX_UNIGRAM_COUNT_QUERY;
148     static const char *const MAX_BIGRAM_COUNT_QUERY;
149     // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
150     // prevent the dictionary from overflowing.
151     static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
152     static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
153     static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
154 
155     const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
156     const HeaderPolicy *const mHeaderPolicy;
157     BufferWithExtendableBuffer *const mDictBuffer;
158     Ver4BigramListPolicy mBigramPolicy;
159     Ver4ShortcutListPolicy mShortcutPolicy;
160     Ver4PatriciaTrieNodeReader mNodeReader;
161     Ver4PtNodeArrayReader mPtNodeArrayReader;
162     Ver4PatriciaTrieNodeWriter mNodeWriter;
163     DynamicPtUpdatingHelper mUpdatingHelper;
164     Ver4PatriciaTrieWritingHelper mWritingHelper;
165     MutableEntryCounters mEntryCounters;
166     std::vector<int> mTerminalPtNodePositionsForIteratingWords;
167     mutable bool mIsCorrupted;
168 
169     int getBigramsPositionOfPtNode(const int ptNodePos) const;
170     int getShortcutPositionOfPtNode(const int ptNodePos) const;
171     int getWordIdFromTerminalPtNodePos(const int ptNodePos) const;
172     int getTerminalPtNodePosFromWordId(const int wordId) const;
173     const WordAttributes getWordAttributes(const int probability,
174             const PtNodeParams &ptNodeParams) const;
175     int getBigramConditionalProbability(const int prevWordUnigramProbability,
176             const bool isInBeginningOfSentenceContext, const int bigramProbability) const;
177 };
178 } // namespace v402
179 } // namespace backward
180 } // namespace latinime
181 #endif // LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
182