1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "dictionary/structure/v4/ver4_dict_constants.h" 18 19 namespace latinime { 20 21 const char *const Ver4DictConstants::BODY_FILE_EXTENSION = ".body"; 22 const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header"; 23 24 // Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets. 25 const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024; 26 // Extended region size, which is not GCed region size in dict file + additional buffer size, is 27 // limited to 1MB to prevent from inefficient traversing. 28 const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024; 29 30 // NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable. 31 // NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model. 32 // NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for shortcut. 33 const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE = 34 NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2 35 + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT 36 + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT; 37 const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0; 38 const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX = 39 TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; 40 const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX = 41 TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; 42 const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX = 43 LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT; 44 45 const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; 46 const int Ver4DictConstants::PROBABILITY_SIZE = 1; 47 const int Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE = 1; 48 const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; 49 const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0; 50 const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4; 51 const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4; 52 const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 0; 53 const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 2; 54 55 const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1; 56 const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2; 57 const uint8_t Ver4DictConstants::FLAG_NOT_A_WORD = 0x4; 58 const uint8_t Ver4DictConstants::FLAG_BLACKLISTED = 0x8; 59 const uint8_t Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE = 0x10; 60 61 const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64; 62 const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; 63 64 const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; 65 const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F; 66 const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80; 67 68 const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1; 69 const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3; 70 const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 2; 71 72 } // namespace latinime 73