1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_PATRICIA_TRIE_POLICY_H 18 #define LATINIME_PATRICIA_TRIE_POLICY_H 19 20 #include <cstdint> 21 #include <vector> 22 23 #include "defines.h" 24 #include "dictionary/header/header_policy.h" 25 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h" 26 #include "dictionary/structure/v2/bigram/bigram_list_policy.h" 27 #include "dictionary/structure/v2/shortcut/shortcut_list_policy.h" 28 #include "dictionary/structure/v2/ver2_patricia_trie_node_reader.h" 29 #include "dictionary/structure/v2/ver2_pt_node_array_reader.h" 30 #include "dictionary/utils/format_utils.h" 31 #include "dictionary/utils/mmapped_buffer.h" 32 #include "utils/byte_array_view.h" 33 #include "utils/int_array_view.h" 34 35 namespace latinime { 36 37 class DicNode; 38 class DicNodeVector; 39 40 // Word id = Position of a PtNode that represents the word. 41 // Max supported n-gram is bigram. 42 class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { 43 public: PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)44 PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer) 45 : mMmappedBuffer(std::move(mmappedBuffer)), 46 mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(), 47 FormatUtils::detectFormatVersion(mMmappedBuffer->getReadOnlyByteArrayView())), 48 mBuffer(mMmappedBuffer->getReadOnlyByteArrayView().skip(mHeaderPolicy.getSize())), 49 mBigramListPolicy(mBuffer), mShortcutListPolicy(mBuffer), 50 mPtNodeReader(mBuffer, &mBigramListPolicy, &mShortcutListPolicy, 51 mHeaderPolicy.getCodePointTable()), 52 mPtNodeArrayReader(mBuffer), mTerminalPtNodePositionsForIteratingWords(), 53 mIsCorrupted(false) {} 54 getRootPosition()55 AK_FORCE_INLINE int getRootPosition() const { 56 return 0; 57 } 58 59 void createAndGetAllChildDicNodes(const DicNode *const dicNode, 60 DicNodeVector *const childDicNodes) const; 61 62 int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount, 63 int *const outCodePoints) const; 64 65 int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; 66 67 const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds, 68 const int wordId, MultiBigramMap *const multiBigramMap) const; 69 70 int getProbability(const int unigramProbability, const int bigramProbability) const; 71 72 int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const; 73 74 void iterateNgramEntries(const WordIdArrayView prevWordIds, 75 NgramListener *const listener) const; 76 77 BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const; 78 getHeaderStructurePolicy()79 const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { 80 return &mHeaderPolicy; 81 } 82 addUnigramEntry(const CodePointArrayView wordCodePoints,const UnigramProperty * const unigramProperty)83 bool addUnigramEntry(const CodePointArrayView wordCodePoints, 84 const UnigramProperty *const unigramProperty) { 85 // This method should not be called for non-updatable dictionary. 86 AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); 87 return false; 88 } 89 removeUnigramEntry(const CodePointArrayView wordCodePoints)90 bool removeUnigramEntry(const CodePointArrayView wordCodePoints) { 91 // This method should not be called for non-updatable dictionary. 92 AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary."); 93 return false; 94 } 95 addNgramEntry(const NgramProperty * const ngramProperty)96 bool addNgramEntry(const NgramProperty *const ngramProperty) { 97 // This method should not be called for non-updatable dictionary. 98 AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); 99 return false; 100 } 101 removeNgramEntry(const NgramContext * const ngramContext,const CodePointArrayView wordCodePoints)102 bool removeNgramEntry(const NgramContext *const ngramContext, 103 const CodePointArrayView wordCodePoints) { 104 // This method should not be called for non-updatable dictionary. 105 AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); 106 return false; 107 } 108 updateEntriesForWordWithNgramContext(const NgramContext * const ngramContext,const CodePointArrayView wordCodePoints,const bool isValidWord,const HistoricalInfo historicalInfo)109 bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, 110 const CodePointArrayView wordCodePoints, const bool isValidWord, 111 const HistoricalInfo historicalInfo) { 112 // This method should not be called for non-updatable dictionary. 113 AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable " 114 "dictionary."); 115 return false; 116 } 117 flush(const char * const filePath)118 bool flush(const char *const filePath) { 119 // This method should not be called for non-updatable dictionary. 120 AKLOGI("Warning: flush() is called for non-updatable dictionary."); 121 return false; 122 } 123 flushWithGC(const char * const filePath)124 bool flushWithGC(const char *const filePath) { 125 // This method should not be called for non-updatable dictionary. 126 AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); 127 return false; 128 } 129 needsToRunGC(const bool mindsBlockByGC)130 bool needsToRunGC(const bool mindsBlockByGC) const { 131 // This method should not be called for non-updatable dictionary. 132 AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); 133 return false; 134 } 135 getProperty(const char * const query,const int queryLength,char * const outResult,const int maxResultLength)136 void getProperty(const char *const query, const int queryLength, char *const outResult, 137 const int maxResultLength) { 138 // getProperty is not supported for this class. 139 if (maxResultLength > 0) { 140 outResult[0] = '\0'; 141 } 142 } 143 144 const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const; 145 146 int getNextWordAndNextToken(const int token, int *const outCodePoints, 147 int *const outCodePointCount); 148 isCorrupted()149 bool isCorrupted() const { 150 return mIsCorrupted; 151 } 152 153 private: 154 DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); 155 156 const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; 157 const HeaderPolicy mHeaderPolicy; 158 const ReadOnlyByteArrayView mBuffer; 159 const BigramListPolicy mBigramListPolicy; 160 const ShortcutListPolicy mShortcutListPolicy; 161 const Ver2ParticiaTrieNodeReader mPtNodeReader; 162 const Ver2PtNodeArrayReader mPtNodeArrayReader; 163 std::vector<int> mTerminalPtNodePositionsForIteratingWords; 164 mutable bool mIsCorrupted; 165 166 int getCodePointsAndProbabilityAndReturnCodePointCount(const int wordId, 167 const int maxCodePointCount, int *const outCodePoints, 168 int *const outUnigramProbability) const; 169 int getShortcutPositionOfPtNode(const int ptNodePos) const; 170 int getBigramsPositionOfPtNode(const int ptNodePos) const; 171 int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, 172 DicNodeVector *const childDicNodes) const; 173 int getWordIdFromTerminalPtNodePos(const int ptNodePos) const; 174 int getTerminalPtNodePosFromWordId(const int wordId) const; 175 const WordAttributes getWordAttributes(const int probability, 176 const PtNodeParams &ptNodeParams) const; 177 bool isValidPos(const int pos) const; 178 }; 179 } // namespace latinime 180 #endif // LATINIME_PATRICIA_TRIE_POLICY_H 181