1 /*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
18
19 #include "dictionary/structure/pt_common/pt_node_array_reader.h"
20 #include "utils/char_utils.h"
21
22 namespace latinime {
23
24 // To avoid infinite loop caused by invalid or malicious forward links.
25 const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
26 const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
27 const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
28
onVisitingPtNode(const PtNodeParams * const ptNodeParams)29 bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode(
30 const PtNodeParams *const ptNodeParams) {
31 if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) {
32 mTerminalPositions->push_back(ptNodeParams->getHeadPos());
33 }
34 return true;
35 }
36
37 // Visits all PtNodes in post-order depth first manner.
38 // For example, visits c -> b -> y -> x -> a for the following dictionary:
39 // a _ b _ c
40 // \ x _ y
traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener * const listener)41 bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
42 TraversingEventListener *const listener) {
43 bool alreadyVisitedChildren = false;
44 // Descend from the root to the root PtNode array.
45 if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
46 return false;
47 }
48 while (!isEnd()) {
49 const PtNodeParams ptNodeParams(getPtNodeParams());
50 if (!ptNodeParams.isValid()) {
51 break;
52 }
53 if (!alreadyVisitedChildren) {
54 if (ptNodeParams.hasChildren()) {
55 // Move to the first child.
56 if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
57 return false;
58 }
59 pushReadingStateToStack();
60 readChildNode(ptNodeParams);
61 } else {
62 alreadyVisitedChildren = true;
63 }
64 } else {
65 if (!listener->onVisitingPtNode(&ptNodeParams)) {
66 return false;
67 }
68 readNextSiblingNode(ptNodeParams);
69 if (isEnd()) {
70 // All PtNodes in current linked PtNode arrays have been visited.
71 // Return to the parent.
72 if (!listener->onReadingPtNodeArrayTail()) {
73 return false;
74 }
75 if (mReadingStateStack.size() <= 0) {
76 break;
77 }
78 if (!listener->onAscend()) {
79 return false;
80 }
81 popReadingStateFromStack();
82 alreadyVisitedChildren = true;
83 } else {
84 // Process sibling PtNode.
85 alreadyVisitedChildren = false;
86 }
87 }
88 }
89 // Ascend from the root PtNode array to the root.
90 if (!listener->onAscend()) {
91 return false;
92 }
93 return !isError();
94 }
95
96 // Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order
97 // that PtNodes are written in the dictionary buffer.
98 // For example, visits a -> b -> x -> c -> y for the following dictionary:
99 // a _ b _ c
100 // \ x _ y
traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(TraversingEventListener * const listener)101 bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
102 TraversingEventListener *const listener) {
103 bool alreadyVisitedAllPtNodesInArray = false;
104 bool alreadyVisitedChildren = false;
105 // Descend from the root to the root PtNode array.
106 if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
107 return false;
108 }
109 if (isEnd()) {
110 // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array.
111 if (!listener->onReadingPtNodeArrayTail()) {
112 return false;
113 }
114 }
115 pushReadingStateToStack();
116 while (!isEnd()) {
117 const PtNodeParams ptNodeParams(getPtNodeParams());
118 if (!ptNodeParams.isValid()) {
119 break;
120 }
121 if (alreadyVisitedAllPtNodesInArray) {
122 if (alreadyVisitedChildren) {
123 // Move to next sibling PtNode's children.
124 readNextSiblingNode(ptNodeParams);
125 if (isEnd()) {
126 // Return to the parent PTNode.
127 if (!listener->onAscend()) {
128 return false;
129 }
130 if (mReadingStateStack.size() <= 0) {
131 break;
132 }
133 popReadingStateFromStack();
134 alreadyVisitedChildren = true;
135 alreadyVisitedAllPtNodesInArray = true;
136 } else {
137 alreadyVisitedChildren = false;
138 }
139 } else {
140 if (ptNodeParams.hasChildren()) {
141 // Move to the first child.
142 if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
143 return false;
144 }
145 pushReadingStateToStack();
146 readChildNode(ptNodeParams);
147 // Push state to return the head of PtNode array.
148 pushReadingStateToStack();
149 alreadyVisitedAllPtNodesInArray = false;
150 alreadyVisitedChildren = false;
151 } else {
152 alreadyVisitedChildren = true;
153 }
154 }
155 } else {
156 if (!listener->onVisitingPtNode(&ptNodeParams)) {
157 return false;
158 }
159 readNextSiblingNode(ptNodeParams);
160 if (isEnd()) {
161 if (!listener->onReadingPtNodeArrayTail()) {
162 return false;
163 }
164 // Return to the head of current PtNode array.
165 popReadingStateFromStack();
166 alreadyVisitedAllPtNodesInArray = true;
167 }
168 }
169 }
170 popReadingStateFromStack();
171 // Ascend from the root PtNode array to the root.
172 if (!listener->onAscend()) {
173 return false;
174 }
175 return !isError();
176 }
177
getCodePointsAndReturnCodePointCount(const int maxCodePointCount,int * const outCodePoints)178 int DynamicPtReadingHelper::getCodePointsAndReturnCodePointCount(const int maxCodePointCount,
179 int *const outCodePoints) {
180 // This method traverses parent nodes from the terminal by following parent pointers; thus,
181 // node code points are stored in the buffer in the reverse order.
182 int reverseCodePoints[maxCodePointCount];
183 const PtNodeParams terminalPtNodeParams(getPtNodeParams());
184 // First, read the terminal node and get its probability.
185 if (!isValidTerminalNode(terminalPtNodeParams)) {
186 // Node at the ptNodePos is not a valid terminal node.
187 return 0;
188 }
189 // Then, following parent node link to the dictionary root and fetch node code points.
190 int totalCodePointCount = 0;
191 while (!isEnd()) {
192 const PtNodeParams ptNodeParams(getPtNodeParams());
193 totalCodePointCount = getTotalCodePointCount(ptNodeParams);
194 if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
195 // The ptNodePos is not a valid terminal node position in the dictionary.
196 return 0;
197 }
198 // Store node code points to buffer in the reverse order.
199 fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
200 reverseCodePoints);
201 // Follow parent node toward the root node.
202 readParentNode(ptNodeParams);
203 }
204 if (isError()) {
205 // The node position or the dictionary is invalid.
206 return 0;
207 }
208 // Reverse the stored code points to output them.
209 for (int i = 0; i < totalCodePointCount; ++i) {
210 outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
211 }
212 return totalCodePointCount;
213 }
214
getTerminalPtNodePositionOfWord(const int * const inWord,const size_t length,const bool forceLowerCaseSearch)215 int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
216 const size_t length, const bool forceLowerCaseSearch) {
217 int searchCodePoints[length];
218 for (size_t i = 0; i < length; ++i) {
219 searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
220 }
221 while (!isEnd()) {
222 const PtNodeParams ptNodeParams(getPtNodeParams());
223 const int matchedCodePointCount = getPrevTotalCodePointCount();
224 if (getTotalCodePointCount(ptNodeParams) > length
225 || !isMatchedCodePoint(ptNodeParams, 0 /* index */,
226 searchCodePoints[matchedCodePointCount])) {
227 // Current node has too many code points or its first code point is different from
228 // target code point. Skip this node and read the next sibling node.
229 readNextSiblingNode(ptNodeParams);
230 continue;
231 }
232 // Check following merged node code points.
233 const int nodeCodePointCount = ptNodeParams.getCodePointCount();
234 for (int j = 1; j < nodeCodePointCount; ++j) {
235 if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
236 // Different code point is found. The given word is not included in the dictionary.
237 return NOT_A_DICT_POS;
238 }
239 }
240 // All characters are matched.
241 if (length == getTotalCodePointCount(ptNodeParams)) {
242 if (!ptNodeParams.isTerminal()) {
243 return NOT_A_DICT_POS;
244 }
245 // Terminal position is found.
246 return ptNodeParams.getHeadPos();
247 }
248 if (!ptNodeParams.hasChildren()) {
249 return NOT_A_DICT_POS;
250 }
251 // Advance to the children nodes.
252 readChildNode(ptNodeParams);
253 }
254 // If we already traversed the tree further than the word is long, there means
255 // there was no match (or we would have found it).
256 return NOT_A_DICT_POS;
257 }
258
259 // Read node array size and process empty node arrays. Nodes and arrays are counted up in this
260 // method to avoid an infinite loop.
nextPtNodeArray()261 void DynamicPtReadingHelper::nextPtNodeArray() {
262 int ptNodeCountInArray = 0;
263 int firstPtNodePos = NOT_A_DICT_POS;
264 if (!mPtNodeArrayReader->readPtNodeArrayInfoAndReturnIfValid(
265 mReadingState.mPos, &ptNodeCountInArray, &firstPtNodePos)) {
266 mIsError = true;
267 mReadingState.mPos = NOT_A_DICT_POS;
268 return;
269 }
270 mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
271 mReadingState.mRemainingPtNodeCountInThisArray = ptNodeCountInArray;
272 mReadingState.mPos = firstPtNodePos;
273 // Count up nodes and node arrays to avoid infinite loop.
274 mReadingState.mTotalPtNodeIndexInThisArrayChain +=
275 mReadingState.mRemainingPtNodeCountInThisArray;
276 mReadingState.mPtNodeArrayIndexInThisArrayChain++;
277 if (mReadingState.mRemainingPtNodeCountInThisArray < 0
278 || mReadingState.mTotalPtNodeIndexInThisArrayChain
279 > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
280 || mReadingState.mPtNodeArrayIndexInThisArrayChain
281 > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
282 // Invalid dictionary.
283 AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
284 "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
285 mReadingState.mRemainingPtNodeCountInThisArray,
286 mReadingState.mTotalPtNodeIndexInThisArrayChain,
287 MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
288 mReadingState.mPtNodeArrayIndexInThisArrayChain,
289 MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
290 ASSERT(false);
291 mIsError = true;
292 mReadingState.mPos = NOT_A_DICT_POS;
293 return;
294 }
295 if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
296 // Empty node array. Try following forward link.
297 followForwardLink();
298 }
299 }
300
301 // Follow the forward link and read the next node array if exists.
followForwardLink()302 void DynamicPtReadingHelper::followForwardLink() {
303 int nextPtNodeArrayPos = NOT_A_DICT_POS;
304 if (!mPtNodeArrayReader->readForwardLinkAndReturnIfValid(
305 mReadingState.mPos, &nextPtNodeArrayPos)) {
306 mIsError = true;
307 mReadingState.mPos = NOT_A_DICT_POS;
308 return;
309 }
310 mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
311 if (nextPtNodeArrayPos != NOT_A_DICT_POS) {
312 // Follow the forward link.
313 mReadingState.mPos = nextPtNodeArrayPos;
314 nextPtNodeArray();
315 } else {
316 // All node arrays have been read.
317 mReadingState.mPos = NOT_A_DICT_POS;
318 }
319 }
320
321 } // namespace latinime
322