1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H 18 #define LATINIME_PATRICIA_TRIE_READING_UTILS_H 19 20 #include <cstdint> 21 22 #include "defines.h" 23 24 namespace latinime { 25 26 class DictionaryShortcutsStructurePolicy; 27 class DictionaryBigramsStructurePolicy; 28 29 class PatriciaTrieReadingUtils { 30 public: 31 typedef uint8_t NodeFlags; 32 33 static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos); 34 35 static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos); 36 37 static int getCodePointAndAdvancePosition(const uint8_t *const buffer, 38 const int *const codePointTable, int *const pos); 39 40 // Returns the number of read characters. 41 static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags, 42 const int maxLength, const int *const codePointTable, int *const outBuffer, 43 int *const pos); 44 45 // Returns the number of skipped characters. 46 static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags, 47 const int maxLength, const int *const codePointTable, int *const pos); 48 49 static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos); 50 51 static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, 52 const NodeFlags flags, int *const pos); 53 54 /** 55 * Node Flags 56 */ isPossiblyOffensive(const NodeFlags flags)57 static AK_FORCE_INLINE bool isPossiblyOffensive(const NodeFlags flags) { 58 return (flags & FLAG_IS_POSSIBLY_OFFENSIVE) != 0; 59 } 60 isNotAWord(const NodeFlags flags)61 static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) { 62 return (flags & FLAG_IS_NOT_A_WORD) != 0; 63 } 64 isTerminal(const NodeFlags flags)65 static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) { 66 return (flags & FLAG_IS_TERMINAL) != 0; 67 } 68 hasShortcutTargets(const NodeFlags flags)69 static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) { 70 return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0; 71 } 72 hasBigrams(const NodeFlags flags)73 static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) { 74 return (flags & FLAG_HAS_BIGRAMS) != 0; 75 } 76 hasMultipleChars(const NodeFlags flags)77 static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) { 78 return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0; 79 } 80 hasChildrenInFlags(const NodeFlags flags)81 static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) { 82 return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags); 83 } 84 createAndGetFlags(const bool isPossiblyOffensive,const bool isNotAWord,const bool isTerminal,const bool hasShortcutTargets,const bool hasBigrams,const bool hasMultipleChars,const int childrenPositionFieldSize)85 static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isPossiblyOffensive, 86 const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets, 87 const bool hasBigrams, const bool hasMultipleChars, 88 const int childrenPositionFieldSize) { 89 NodeFlags nodeFlags = 0; 90 nodeFlags = isPossiblyOffensive ? (nodeFlags | FLAG_IS_POSSIBLY_OFFENSIVE) : nodeFlags; 91 nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags; 92 nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags; 93 nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags; 94 nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags; 95 nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags; 96 if (childrenPositionFieldSize == 1) { 97 nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; 98 } else if (childrenPositionFieldSize == 2) { 99 nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; 100 } else if (childrenPositionFieldSize == 3) { 101 nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; 102 } else { 103 nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; 104 } 105 return nodeFlags; 106 } 107 108 static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos, 109 const DictionaryShortcutsStructurePolicy *const shortcutPolicy, 110 const DictionaryBigramsStructurePolicy *const bigramPolicy, 111 const int *const codePointTable, NodeFlags *const outFlags, 112 int *const outCodePointCount, int *const outCodePoint, int *const outProbability, 113 int *const outChildrenPos, int *const outShortcutPos, int *const outBigramPos, 114 int *const outSiblingPos); 115 116 private: 117 DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); 118 119 static const NodeFlags MASK_CHILDREN_POSITION_TYPE; 120 static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION; 121 static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE; 122 static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES; 123 static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES; 124 125 static const NodeFlags FLAG_HAS_MULTIPLE_CHARS; 126 static const NodeFlags FLAG_IS_TERMINAL; 127 static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS; 128 static const NodeFlags FLAG_HAS_BIGRAMS; 129 static const NodeFlags FLAG_IS_NOT_A_WORD; 130 static const NodeFlags FLAG_IS_POSSIBLY_OFFENSIVE; 131 }; 132 } // namespace latinime 133 #endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */ 134