1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! 19 * Do not edit this file other than updating policy's interface. 20 * 21 * This file was generated from 22 * dictionary/structure/v4/ver4_patricia_trie_policy.h 23 */ 24 25 #ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H 26 #define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H 27 28 #include <vector> 29 30 #include "defines.h" 31 #include "dictionary/header/header_policy.h" 32 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h" 33 #include "dictionary/structure/pt_common/dynamic_pt_updating_helper.h" 34 #include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" 35 #include "dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h" 36 #include "dictionary/structure/backward/v402/ver4_dict_buffers.h" 37 #include "dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" 38 #include "dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h" 39 #include "dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h" 40 #include "dictionary/structure/backward/v402/ver4_pt_node_array_reader.h" 41 #include "dictionary/utils/binary_dictionary_bigrams_iterator.h" 42 #include "dictionary/utils/binary_dictionary_shortcut_iterator.h" 43 #include "dictionary/utils/buffer_with_extendable_buffer.h" 44 #include "dictionary/utils/entry_counters.h" 45 #include "utils/int_array_view.h" 46 47 namespace latinime { 48 namespace backward { 49 namespace v402 { 50 51 } // namespace v402 52 } // namespace backward 53 class DicNode; 54 namespace backward { 55 namespace v402 { 56 } // namespace v402 57 } // namespace backward 58 class DicNodeVector; 59 namespace backward { 60 namespace v402 { 61 62 // Word id = Position of a PtNode that represents the word. 63 // Max supported n-gram is bigram. 64 class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { 65 public: Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)66 Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers) 67 : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()), 68 mDictBuffer(mBuffers->getWritableTrieBuffer()), 69 mBigramPolicy(mBuffers->getMutableBigramDictContent(), 70 mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy), 71 mShortcutPolicy(mBuffers->getMutableShortcutDictContent(), 72 mBuffers->getTerminalPositionLookupTable()), 73 mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy), 74 mPtNodeArrayReader(mDictBuffer), 75 mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader, 76 &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy), 77 mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), 78 mWritingHelper(mBuffers.get()), 79 mEntryCounters(mHeaderPolicy->getNgramCounts().getCountArray()), 80 mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}; 81 getRootPosition()82 virtual int getRootPosition() const { 83 return 0; 84 } 85 86 void createAndGetAllChildDicNodes(const DicNode *const dicNode, 87 DicNodeVector *const childDicNodes) const; 88 89 int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount, 90 int *const outCodePoints) const; 91 92 int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; 93 94 const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds, 95 const int wordId, MultiBigramMap *const multiBigramMap) const; 96 97 int getProbability(const int unigramProbability, const int bigramProbability) const; 98 99 int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const; 100 101 void iterateNgramEntries(const WordIdArrayView prevWordIds, 102 NgramListener *const listener) const; 103 104 BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const; 105 getHeaderStructurePolicy()106 const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { 107 return mHeaderPolicy; 108 } 109 110 bool addUnigramEntry(const CodePointArrayView wordCodePoints, 111 const UnigramProperty *const unigramProperty); 112 113 bool removeUnigramEntry(const CodePointArrayView wordCodePoints); 114 115 bool addNgramEntry(const NgramProperty *const ngramProperty); 116 117 bool removeNgramEntry(const NgramContext *const ngramContext, 118 const CodePointArrayView wordCodePoints); 119 120 bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, 121 const CodePointArrayView wordCodePoints, const bool isValidWord, 122 const HistoricalInfo historicalInfo); 123 124 bool flush(const char *const filePath); 125 126 bool flushWithGC(const char *const filePath); 127 128 bool needsToRunGC(const bool mindsBlockByGC) const; 129 130 void getProperty(const char *const query, const int queryLength, char *const outResult, 131 const int maxResultLength); 132 133 const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const; 134 135 int getNextWordAndNextToken(const int token, int *const outCodePoints, 136 int *const outCodePointCount); 137 isCorrupted()138 bool isCorrupted() const { 139 return mIsCorrupted; 140 } 141 142 private: 143 DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); 144 145 static const char *const UNIGRAM_COUNT_QUERY; 146 static const char *const BIGRAM_COUNT_QUERY; 147 static const char *const MAX_UNIGRAM_COUNT_QUERY; 148 static const char *const MAX_BIGRAM_COUNT_QUERY; 149 // When the dictionary size is near the maximum size, we have to refuse dynamic operations to 150 // prevent the dictionary from overflowing. 151 static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; 152 static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; 153 static const int DUMMY_PROBABILITY_FOR_VALID_WORDS; 154 155 const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; 156 const HeaderPolicy *const mHeaderPolicy; 157 BufferWithExtendableBuffer *const mDictBuffer; 158 Ver4BigramListPolicy mBigramPolicy; 159 Ver4ShortcutListPolicy mShortcutPolicy; 160 Ver4PatriciaTrieNodeReader mNodeReader; 161 Ver4PtNodeArrayReader mPtNodeArrayReader; 162 Ver4PatriciaTrieNodeWriter mNodeWriter; 163 DynamicPtUpdatingHelper mUpdatingHelper; 164 Ver4PatriciaTrieWritingHelper mWritingHelper; 165 MutableEntryCounters mEntryCounters; 166 std::vector<int> mTerminalPtNodePositionsForIteratingWords; 167 mutable bool mIsCorrupted; 168 169 int getBigramsPositionOfPtNode(const int ptNodePos) const; 170 int getShortcutPositionOfPtNode(const int ptNodePos) const; 171 int getWordIdFromTerminalPtNodePos(const int ptNodePos) const; 172 int getTerminalPtNodePosFromWordId(const int wordId) const; 173 const WordAttributes getWordAttributes(const int probability, 174 const PtNodeParams &ptNodeParams) const; 175 int getBigramConditionalProbability(const int prevWordUnigramProbability, 176 const bool isInBeginningOfSentenceContext, const int bigramProbability) const; 177 }; 178 } // namespace v402 179 } // namespace backward 180 } // namespace latinime 181 #endif // LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H 182