1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "dictionary/structure/v4/ver4_dict_constants.h"
18 
19 namespace latinime {
20 
21 const char *const Ver4DictConstants::BODY_FILE_EXTENSION = ".body";
22 const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
23 
24 // Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
25 const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
26 // Extended region size, which is not GCed region size in dict file + additional buffer size, is
27 // limited to 1MB to prevent from inefficient traversing.
28 const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
29 
30 // NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
31 // NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
32 // NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for shortcut.
33 const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
34         NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
35                 + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
36                 + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
37 const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
38 const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
39         TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
40 const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
41         TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
42 const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
43         LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
44 
45 const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
46 const int Ver4DictConstants::PROBABILITY_SIZE = 1;
47 const int Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE = 1;
48 const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
49 const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
50 const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
51 const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
52 const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 0;
53 const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 2;
54 
55 const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
56 const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2;
57 const uint8_t Ver4DictConstants::FLAG_NOT_A_WORD = 0x4;
58 const uint8_t Ver4DictConstants::FLAG_BLACKLISTED = 0x8;
59 const uint8_t Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE = 0x10;
60 
61 const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
62 const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
63 
64 const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
65 const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
66 const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
67 
68 const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
69 const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
70 const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 2;
71 
72 } // namespace latinime
73