1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_PT_NODE_PARAMS_H
18 #define LATINIME_PT_NODE_PARAMS_H
19 
20 #include <cstring>
21 
22 #include "defines.h"
23 #include "dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
24 #include "dictionary/structure/pt_common/patricia_trie_reading_utils.h"
25 #include "dictionary/structure/v4/ver4_dict_constants.h"
26 #include "utils/char_utils.h"
27 #include "utils/int_array_view.h"
28 
29 namespace latinime {
30 
31 // This class has information of a PtNode. This class is immutable.
32 class PtNodeParams {
33  public:
34     // Invalid PtNode.
PtNodeParams()35     PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mHasMovedFlag(false),
36             mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(),
37             mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
38             mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
39             mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
40             mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
41             mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {}
42 
PtNodeParams(const PtNodeParams & ptNodeParams)43     PtNodeParams(const PtNodeParams& ptNodeParams)
44             : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
45               mHasMovedFlag(ptNodeParams.mHasMovedFlag), mParentPos(ptNodeParams.mParentPos),
46               mCodePointCount(ptNodeParams.mCodePointCount), mCodePoints(),
47               mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
48               mTerminalId(ptNodeParams.mTerminalId),
49               mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
50               mProbability(ptNodeParams.mProbability),
51               mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
52               mChildrenPos(ptNodeParams.mChildrenPos),
53               mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
54               mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
55               mSiblingPos(ptNodeParams.mSiblingPos) {
56         memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
57     }
58 
59     // PtNode read from version 2 dictionary.
PtNodeParams(const int headPos,const PatriciaTrieReadingUtils::NodeFlags flags,const int codePointCount,const int * const codePoints,const int probability,const int childrenPos,const int shortcutPos,const int bigramPos,const int siblingPos)60     PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
61             const int codePointCount, const int *const codePoints, const int probability,
62             const int childrenPos, const int shortcutPos, const int bigramPos,
63             const int siblingPos)
64             : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(false), mParentPos(NOT_A_DICT_POS),
65               mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
66               mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
67               mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
68               mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
69               mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
70               mBigramPos(bigramPos), mSiblingPos(siblingPos) {
71         memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
72     }
73 
74     // PtNode with a terminal id.
PtNodeParams(const int headPos,const PatriciaTrieReadingUtils::NodeFlags flags,const int parentPos,const int codePointCount,const int * const codePoints,const int terminalIdFieldPos,const int terminalId,const int probability,const int childrenPosFieldPos,const int childrenPos,const int siblingPos)75     PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
76             const int parentPos, const int codePointCount, const int *const codePoints,
77             const int terminalIdFieldPos, const int terminalId, const int probability,
78             const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
79             : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
80               mCodePointCount(codePointCount), mCodePoints(),
81               mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
82               mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
83               mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
84               mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(terminalId),
85               mBigramPos(terminalId), mSiblingPos(siblingPos) {
86         memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
87     }
88 
89     // Construct new params by updating existing PtNode params.
PtNodeParams(const PtNodeParams * const ptNodeParams,const PatriciaTrieReadingUtils::NodeFlags flags,const int parentPos,const CodePointArrayView codePoints,const int probability)90     PtNodeParams(const PtNodeParams *const ptNodeParams,
91             const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
92             const CodePointArrayView codePoints, const int probability)
93             : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
94               mParentPos(parentPos), mCodePointCount(codePoints.size()), mCodePoints(),
95               mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
96               mTerminalId(ptNodeParams->getTerminalId()),
97               mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
98               mProbability(probability),
99               mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()),
100               mChildrenPos(ptNodeParams->getChildrenPos()),
101               mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()),
102               mShortcutPos(ptNodeParams->getShortcutPos()),
103               mBigramPos(ptNodeParams->getBigramsPos()),
104               mSiblingPos(ptNodeParams->getSiblingNodePos()) {
105         memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount);
106     }
107 
PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags,const int parentPos,const CodePointArrayView codePoints,const int probability)108     PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
109             const CodePointArrayView codePoints, const int probability)
110             : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
111               mCodePointCount(codePoints.size()), mCodePoints(),
112               mTerminalIdFieldPos(NOT_A_DICT_POS),
113               mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
114               mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
115               mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
116               mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
117               mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
118         memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount);
119     }
120 
isValid()121     AK_FORCE_INLINE bool isValid() const {
122         return mCodePointCount > 0;
123     }
124 
125     // Head position of the PtNode
getHeadPos()126     AK_FORCE_INLINE int getHeadPos() const {
127         return mHeadPos;
128     }
129 
130     // Flags
isDeleted()131     AK_FORCE_INLINE bool isDeleted() const {
132         return mHasMovedFlag && DynamicPtReadingUtils::isDeleted(mFlags);
133     }
134 
willBecomeNonTerminal()135     AK_FORCE_INLINE bool willBecomeNonTerminal() const {
136         return mHasMovedFlag && DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
137     }
138 
hasChildren()139     AK_FORCE_INLINE bool hasChildren() const {
140         return mChildrenPos != NOT_A_DICT_POS;
141     }
142 
isTerminal()143     AK_FORCE_INLINE bool isTerminal() const {
144         return PatriciaTrieReadingUtils::isTerminal(mFlags);
145     }
146 
isPossiblyOffensive()147     AK_FORCE_INLINE bool isPossiblyOffensive() const {
148         return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
149     }
150 
isNotAWord()151     AK_FORCE_INLINE bool isNotAWord() const {
152         return PatriciaTrieReadingUtils::isNotAWord(mFlags);
153     }
154 
hasBigrams()155     AK_FORCE_INLINE bool hasBigrams() const {
156         return PatriciaTrieReadingUtils::hasBigrams(mFlags);
157     }
158 
hasShortcutTargets()159     AK_FORCE_INLINE bool hasShortcutTargets() const {
160         return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
161     }
162 
representsNonWordInfo()163     AK_FORCE_INLINE bool representsNonWordInfo() const {
164         return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
165                 && isNotAWord();
166     }
167 
representsBeginningOfSentence()168     AK_FORCE_INLINE int representsBeginningOfSentence() const {
169         return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
170                 && isNotAWord();
171     }
172 
173     // Parent node position
getParentPos()174     AK_FORCE_INLINE int getParentPos() const {
175         return mParentPos;
176     }
177 
getCodePointArrayView()178     AK_FORCE_INLINE const CodePointArrayView getCodePointArrayView() const {
179         return CodePointArrayView(mCodePoints, mCodePointCount);
180     }
181 
182     // TODO: Remove
183     // Number of code points
getCodePointCount()184     AK_FORCE_INLINE uint8_t getCodePointCount() const {
185         return mCodePointCount;
186     }
187 
188     // TODO: Remove
getCodePoints()189     AK_FORCE_INLINE const int *getCodePoints() const {
190         return mCodePoints;
191     }
192 
193     // Probability
getTerminalIdFieldPos()194     AK_FORCE_INLINE int getTerminalIdFieldPos() const {
195         return mTerminalIdFieldPos;
196     }
197 
getTerminalId()198     AK_FORCE_INLINE int getTerminalId() const {
199         return mTerminalId;
200     }
201 
202     // Probability
getProbabilityFieldPos()203     AK_FORCE_INLINE int getProbabilityFieldPos() const {
204         return mProbabilityFieldPos;
205     }
206 
getProbability()207     AK_FORCE_INLINE int getProbability() const {
208         return mProbability;
209     }
210 
211     // Children PtNode array position
getChildrenPosFieldPos()212     AK_FORCE_INLINE int getChildrenPosFieldPos() const {
213         return mChildrenPosFieldPos;
214     }
215 
getChildrenPos()216     AK_FORCE_INLINE int getChildrenPos() const {
217         return mChildrenPos;
218     }
219 
220     // Bigram linked node position.
getBigramLinkedNodePos()221     AK_FORCE_INLINE int getBigramLinkedNodePos() const {
222         return mBigramLinkedNodePos;
223     }
224 
225     // Shortcutlist position
getShortcutPos()226     AK_FORCE_INLINE int getShortcutPos() const {
227         return mShortcutPos;
228     }
229 
230     // Bigrams position
getBigramsPos()231     AK_FORCE_INLINE int getBigramsPos() const {
232         return mBigramPos;
233     }
234 
235     // Sibling node position
getSiblingNodePos()236     AK_FORCE_INLINE int getSiblingNodePos() const {
237         return mSiblingPos;
238     }
239 
240  private:
241     // This class have a public copy constructor to be used as a return value.
242     DISALLOW_ASSIGNMENT_OPERATOR(PtNodeParams);
243 
244     const int mHeadPos;
245     const PatriciaTrieReadingUtils::NodeFlags mFlags;
246     const bool mHasMovedFlag;
247     const int mParentPos;
248     const uint8_t mCodePointCount;
249     int mCodePoints[MAX_WORD_LENGTH];
250     const int mTerminalIdFieldPos;
251     const int mTerminalId;
252     const int mProbabilityFieldPos;
253     const int mProbability;
254     const int mChildrenPosFieldPos;
255     const int mChildrenPos;
256     const int mBigramLinkedNodePos;
257     const int mShortcutPos;
258     const int mBigramPos;
259     const int mSiblingPos;
260 };
261 } // namespace latinime
262 #endif /* LATINIME_PT_NODE_PARAMS_H */
263