1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_PT_NODE_PARAMS_H 18 #define LATINIME_PT_NODE_PARAMS_H 19 20 #include <cstring> 21 22 #include "defines.h" 23 #include "dictionary/structure/pt_common/dynamic_pt_reading_utils.h" 24 #include "dictionary/structure/pt_common/patricia_trie_reading_utils.h" 25 #include "dictionary/structure/v4/ver4_dict_constants.h" 26 #include "utils/char_utils.h" 27 #include "utils/int_array_view.h" 28 29 namespace latinime { 30 31 // This class has information of a PtNode. This class is immutable. 32 class PtNodeParams { 33 public: 34 // Invalid PtNode. PtNodeParams()35 PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mHasMovedFlag(false), 36 mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(), 37 mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), 38 mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), 39 mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), 40 mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), 41 mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {} 42 PtNodeParams(const PtNodeParams & ptNodeParams)43 PtNodeParams(const PtNodeParams& ptNodeParams) 44 : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags), 45 mHasMovedFlag(ptNodeParams.mHasMovedFlag), mParentPos(ptNodeParams.mParentPos), 46 mCodePointCount(ptNodeParams.mCodePointCount), mCodePoints(), 47 mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos), 48 mTerminalId(ptNodeParams.mTerminalId), 49 mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos), 50 mProbability(ptNodeParams.mProbability), 51 mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos), 52 mChildrenPos(ptNodeParams.mChildrenPos), 53 mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos), 54 mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos), 55 mSiblingPos(ptNodeParams.mSiblingPos) { 56 memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount); 57 } 58 59 // PtNode read from version 2 dictionary. PtNodeParams(const int headPos,const PatriciaTrieReadingUtils::NodeFlags flags,const int codePointCount,const int * const codePoints,const int probability,const int childrenPos,const int shortcutPos,const int bigramPos,const int siblingPos)60 PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, 61 const int codePointCount, const int *const codePoints, const int probability, 62 const int childrenPos, const int shortcutPos, const int bigramPos, 63 const int siblingPos) 64 : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(false), mParentPos(NOT_A_DICT_POS), 65 mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), 66 mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), 67 mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), 68 mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos), 69 mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos), 70 mBigramPos(bigramPos), mSiblingPos(siblingPos) { 71 memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); 72 } 73 74 // PtNode with a terminal id. PtNodeParams(const int headPos,const PatriciaTrieReadingUtils::NodeFlags flags,const int parentPos,const int codePointCount,const int * const codePoints,const int terminalIdFieldPos,const int terminalId,const int probability,const int childrenPosFieldPos,const int childrenPos,const int siblingPos)75 PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, 76 const int parentPos, const int codePointCount, const int *const codePoints, 77 const int terminalIdFieldPos, const int terminalId, const int probability, 78 const int childrenPosFieldPos, const int childrenPos, const int siblingPos) 79 : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos), 80 mCodePointCount(codePointCount), mCodePoints(), 81 mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId), 82 mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), 83 mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos), 84 mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(terminalId), 85 mBigramPos(terminalId), mSiblingPos(siblingPos) { 86 memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); 87 } 88 89 // Construct new params by updating existing PtNode params. PtNodeParams(const PtNodeParams * const ptNodeParams,const PatriciaTrieReadingUtils::NodeFlags flags,const int parentPos,const CodePointArrayView codePoints,const int probability)90 PtNodeParams(const PtNodeParams *const ptNodeParams, 91 const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, 92 const CodePointArrayView codePoints, const int probability) 93 : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true), 94 mParentPos(parentPos), mCodePointCount(codePoints.size()), mCodePoints(), 95 mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()), 96 mTerminalId(ptNodeParams->getTerminalId()), 97 mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()), 98 mProbability(probability), 99 mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()), 100 mChildrenPos(ptNodeParams->getChildrenPos()), 101 mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()), 102 mShortcutPos(ptNodeParams->getShortcutPos()), 103 mBigramPos(ptNodeParams->getBigramsPos()), 104 mSiblingPos(ptNodeParams->getSiblingNodePos()) { 105 memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount); 106 } 107 PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags,const int parentPos,const CodePointArrayView codePoints,const int probability)108 PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, 109 const CodePointArrayView codePoints, const int probability) 110 : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos), 111 mCodePointCount(codePoints.size()), mCodePoints(), 112 mTerminalIdFieldPos(NOT_A_DICT_POS), 113 mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), 114 mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), 115 mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), 116 mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), 117 mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) { 118 memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount); 119 } 120 isValid()121 AK_FORCE_INLINE bool isValid() const { 122 return mCodePointCount > 0; 123 } 124 125 // Head position of the PtNode getHeadPos()126 AK_FORCE_INLINE int getHeadPos() const { 127 return mHeadPos; 128 } 129 130 // Flags isDeleted()131 AK_FORCE_INLINE bool isDeleted() const { 132 return mHasMovedFlag && DynamicPtReadingUtils::isDeleted(mFlags); 133 } 134 willBecomeNonTerminal()135 AK_FORCE_INLINE bool willBecomeNonTerminal() const { 136 return mHasMovedFlag && DynamicPtReadingUtils::willBecomeNonTerminal(mFlags); 137 } 138 hasChildren()139 AK_FORCE_INLINE bool hasChildren() const { 140 return mChildrenPos != NOT_A_DICT_POS; 141 } 142 isTerminal()143 AK_FORCE_INLINE bool isTerminal() const { 144 return PatriciaTrieReadingUtils::isTerminal(mFlags); 145 } 146 isPossiblyOffensive()147 AK_FORCE_INLINE bool isPossiblyOffensive() const { 148 return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags); 149 } 150 isNotAWord()151 AK_FORCE_INLINE bool isNotAWord() const { 152 return PatriciaTrieReadingUtils::isNotAWord(mFlags); 153 } 154 hasBigrams()155 AK_FORCE_INLINE bool hasBigrams() const { 156 return PatriciaTrieReadingUtils::hasBigrams(mFlags); 157 } 158 hasShortcutTargets()159 AK_FORCE_INLINE bool hasShortcutTargets() const { 160 return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags); 161 } 162 representsNonWordInfo()163 AK_FORCE_INLINE bool representsNonWordInfo() const { 164 return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0]) 165 && isNotAWord(); 166 } 167 representsBeginningOfSentence()168 AK_FORCE_INLINE int representsBeginningOfSentence() const { 169 return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE 170 && isNotAWord(); 171 } 172 173 // Parent node position getParentPos()174 AK_FORCE_INLINE int getParentPos() const { 175 return mParentPos; 176 } 177 getCodePointArrayView()178 AK_FORCE_INLINE const CodePointArrayView getCodePointArrayView() const { 179 return CodePointArrayView(mCodePoints, mCodePointCount); 180 } 181 182 // TODO: Remove 183 // Number of code points getCodePointCount()184 AK_FORCE_INLINE uint8_t getCodePointCount() const { 185 return mCodePointCount; 186 } 187 188 // TODO: Remove getCodePoints()189 AK_FORCE_INLINE const int *getCodePoints() const { 190 return mCodePoints; 191 } 192 193 // Probability getTerminalIdFieldPos()194 AK_FORCE_INLINE int getTerminalIdFieldPos() const { 195 return mTerminalIdFieldPos; 196 } 197 getTerminalId()198 AK_FORCE_INLINE int getTerminalId() const { 199 return mTerminalId; 200 } 201 202 // Probability getProbabilityFieldPos()203 AK_FORCE_INLINE int getProbabilityFieldPos() const { 204 return mProbabilityFieldPos; 205 } 206 getProbability()207 AK_FORCE_INLINE int getProbability() const { 208 return mProbability; 209 } 210 211 // Children PtNode array position getChildrenPosFieldPos()212 AK_FORCE_INLINE int getChildrenPosFieldPos() const { 213 return mChildrenPosFieldPos; 214 } 215 getChildrenPos()216 AK_FORCE_INLINE int getChildrenPos() const { 217 return mChildrenPos; 218 } 219 220 // Bigram linked node position. getBigramLinkedNodePos()221 AK_FORCE_INLINE int getBigramLinkedNodePos() const { 222 return mBigramLinkedNodePos; 223 } 224 225 // Shortcutlist position getShortcutPos()226 AK_FORCE_INLINE int getShortcutPos() const { 227 return mShortcutPos; 228 } 229 230 // Bigrams position getBigramsPos()231 AK_FORCE_INLINE int getBigramsPos() const { 232 return mBigramPos; 233 } 234 235 // Sibling node position getSiblingNodePos()236 AK_FORCE_INLINE int getSiblingNodePos() const { 237 return mSiblingPos; 238 } 239 240 private: 241 // This class have a public copy constructor to be used as a return value. 242 DISALLOW_ASSIGNMENT_OPERATOR(PtNodeParams); 243 244 const int mHeadPos; 245 const PatriciaTrieReadingUtils::NodeFlags mFlags; 246 const bool mHasMovedFlag; 247 const int mParentPos; 248 const uint8_t mCodePointCount; 249 int mCodePoints[MAX_WORD_LENGTH]; 250 const int mTerminalIdFieldPos; 251 const int mTerminalId; 252 const int mProbabilityFieldPos; 253 const int mProbability; 254 const int mChildrenPosFieldPos; 255 const int mChildrenPos; 256 const int mBigramLinkedNodePos; 257 const int mShortcutPos; 258 const int mBigramPos; 259 const int mSiblingPos; 260 }; 261 } // namespace latinime 262 #endif /* LATINIME_PT_NODE_PARAMS_H */ 263