1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
18 #define LATINIME_PATRICIA_TRIE_READING_UTILS_H
19 
20 #include <cstdint>
21 
22 #include "defines.h"
23 
24 namespace latinime {
25 
26 class DictionaryShortcutsStructurePolicy;
27 class DictionaryBigramsStructurePolicy;
28 
29 class PatriciaTrieReadingUtils {
30  public:
31     typedef uint8_t NodeFlags;
32 
33     static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos);
34 
35     static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
36 
37     static int getCodePointAndAdvancePosition(const uint8_t *const buffer,
38             const int *const codePointTable, int *const pos);
39 
40     // Returns the number of read characters.
41     static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
42             const int maxLength, const int *const codePointTable, int *const outBuffer,
43             int *const pos);
44 
45     // Returns the number of skipped characters.
46     static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
47             const int maxLength, const int *const codePointTable, int *const pos);
48 
49     static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
50 
51     static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
52             const NodeFlags flags, int *const pos);
53 
54     /**
55      * Node Flags
56      */
isPossiblyOffensive(const NodeFlags flags)57     static AK_FORCE_INLINE bool isPossiblyOffensive(const NodeFlags flags) {
58         return (flags & FLAG_IS_POSSIBLY_OFFENSIVE) != 0;
59     }
60 
isNotAWord(const NodeFlags flags)61     static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
62         return (flags & FLAG_IS_NOT_A_WORD) != 0;
63     }
64 
isTerminal(const NodeFlags flags)65     static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
66         return (flags & FLAG_IS_TERMINAL) != 0;
67     }
68 
hasShortcutTargets(const NodeFlags flags)69     static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
70         return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
71     }
72 
hasBigrams(const NodeFlags flags)73     static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
74         return (flags & FLAG_HAS_BIGRAMS) != 0;
75     }
76 
hasMultipleChars(const NodeFlags flags)77     static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
78         return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
79     }
80 
hasChildrenInFlags(const NodeFlags flags)81     static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
82         return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
83     }
84 
createAndGetFlags(const bool isPossiblyOffensive,const bool isNotAWord,const bool isTerminal,const bool hasShortcutTargets,const bool hasBigrams,const bool hasMultipleChars,const int childrenPositionFieldSize)85     static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isPossiblyOffensive,
86             const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
87             const bool hasBigrams, const bool hasMultipleChars,
88             const int childrenPositionFieldSize) {
89         NodeFlags nodeFlags = 0;
90         nodeFlags = isPossiblyOffensive ? (nodeFlags | FLAG_IS_POSSIBLY_OFFENSIVE) : nodeFlags;
91         nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
92         nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
93         nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
94         nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags;
95         nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags;
96         if (childrenPositionFieldSize == 1) {
97             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
98         } else if (childrenPositionFieldSize == 2) {
99             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
100         } else if (childrenPositionFieldSize == 3) {
101             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
102         } else {
103             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
104         }
105         return nodeFlags;
106     }
107 
108     static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
109             const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
110             const DictionaryBigramsStructurePolicy *const bigramPolicy,
111             const int *const codePointTable, NodeFlags *const outFlags,
112             int *const outCodePointCount, int *const outCodePoint, int *const outProbability,
113             int *const outChildrenPos, int *const outShortcutPos, int *const outBigramPos,
114             int *const outSiblingPos);
115 
116  private:
117     DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
118 
119     static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
120     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
121     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
122     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
123     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
124 
125     static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
126     static const NodeFlags FLAG_IS_TERMINAL;
127     static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
128     static const NodeFlags FLAG_HAS_BIGRAMS;
129     static const NodeFlags FLAG_IS_NOT_A_WORD;
130     static const NodeFlags FLAG_IS_POSSIBLY_OFFENSIVE;
131 };
132 } // namespace latinime
133 #endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
134