1 /*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "dictionary/header/header_read_write_utils.h"
18
19 #include <cctype>
20 #include <cstdio>
21 #include <memory>
22 #include <vector>
23
24 #include "defines.h"
25 #include "dictionary/utils/buffer_with_extendable_buffer.h"
26 #include "dictionary/utils/byte_array_utils.h"
27
28 namespace latinime {
29
30 // Number of base-10 digits in the largest integer + 1 to leave room for a zero terminator.
31 // As such, this is the maximum number of characters will be needed to represent an int as a
32 // string, including the terminator; this is used as the size of a string buffer large enough to
33 // hold any value that is intended to fit in an integer, e.g. in the code that reads the header
34 // of the binary dictionary where a {key,value} string pair scheme is used.
35 const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;
36
37 const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
38 const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048;
39
40 const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
41 const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
42 const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
43 const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
44 const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable";
45
46 const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;
47
48 typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
49
getHeaderSize(const uint8_t * const dictBuf)50 /* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) {
51 // See the format of the header in the comment in
52 // BinaryDictionaryFormatUtils::detectFormatVersion()
53 return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE
54 + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE);
55 }
56
57 /* static */ HeaderReadWriteUtils::DictionaryFlags
getFlags(const uint8_t * const dictBuf)58 HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) {
59 return ByteArrayUtils::readUint16(dictBuf,
60 HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE);
61 }
62
63 /* static */ HeaderReadWriteUtils::DictionaryFlags
createAndGetDictionaryFlagsUsingAttributeMap(const AttributeMap * const attributeMap)64 HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
65 const AttributeMap *const attributeMap) {
66 return NO_FLAGS;
67 }
68
fetchAllHeaderAttributes(const uint8_t * const dictBuf,AttributeMap * const headerAttributes)69 /* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf,
70 AttributeMap *const headerAttributes) {
71 const int headerSize = getHeaderSize(dictBuf);
72 int pos = getHeaderOptionsPosition();
73 if (pos == NOT_A_DICT_POS) {
74 // The header doesn't have header options.
75 return;
76 }
77 int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
78 std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]);
79 while (pos < headerSize) {
80 // The values in the header don't use the code point table for their encoding.
81 const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
82 MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos);
83 std::vector<int> key;
84 key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
85 const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
86 MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos);
87 std::vector<int> value;
88 value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength);
89 headerAttributes->insert(AttributeMap::value_type(key, value));
90 }
91 }
92
readCodePointTable(AttributeMap * const headerAttributes)93 /* static */ const int *HeaderReadWriteUtils::readCodePointTable(
94 AttributeMap *const headerAttributes) {
95 AttributeMap::key_type keyVector;
96 insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector);
97 AttributeMap::const_iterator it = headerAttributes->find(keyVector);
98 if (it == headerAttributes->end()) {
99 return nullptr;
100 }
101 return it->second.data();
102 }
103
writeDictionaryVersion(BufferWithExtendableBuffer * const buffer,const FormatUtils::FORMAT_VERSION version,int * const writingPos)104 /* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
105 BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
106 int *const writingPos) {
107 if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE,
108 writingPos)) {
109 return false;
110 }
111 switch (version) {
112 case FormatUtils::VERSION_2:
113 case FormatUtils::VERSION_201:
114 case FormatUtils::VERSION_202:
115 // None of the static dictionaries (v2x) support writing
116 return false;
117 case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
118 case FormatUtils::VERSION_402:
119 case FormatUtils::VERSION_403:
120 return buffer->writeUintAndAdvancePosition(version /* data */,
121 HEADER_DICTIONARY_VERSION_SIZE, writingPos);
122 default:
123 return false;
124 }
125 }
126
writeDictionaryFlags(BufferWithExtendableBuffer * const buffer,const DictionaryFlags flags,int * const writingPos)127 /* static */ bool HeaderReadWriteUtils::writeDictionaryFlags(
128 BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags,
129 int *const writingPos) {
130 return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos);
131 }
132
writeDictionaryHeaderSize(BufferWithExtendableBuffer * const buffer,const int size,int * const writingPos)133 /* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize(
134 BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) {
135 return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos);
136 }
137
writeHeaderAttributes(BufferWithExtendableBuffer * const buffer,const AttributeMap * const headerAttributes,int * const writingPos)138 /* static */ bool HeaderReadWriteUtils::writeHeaderAttributes(
139 BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes,
140 int *const writingPos) {
141 for (AttributeMap::const_iterator it = headerAttributes->begin();
142 it != headerAttributes->end(); ++it) {
143 if (it->first.empty() || it->second.empty()) {
144 continue;
145 }
146 // Write a key.
147 if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(),
148 true /* writesTerminator */, writingPos)) {
149 return false;
150 }
151 // Write a value.
152 if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(),
153 true /* writesTerminator */, writingPos)) {
154 return false;
155 }
156 }
157 return true;
158 }
159
setCodePointVectorAttribute(AttributeMap * const headerAttributes,const char * const key,const std::vector<int> & value)160 /* static */ void HeaderReadWriteUtils::setCodePointVectorAttribute(
161 AttributeMap *const headerAttributes, const char *const key,
162 const std::vector<int> &value) {
163 AttributeMap::key_type keyVector;
164 insertCharactersIntoVector(key, &keyVector);
165 (*headerAttributes)[keyVector] = value;
166 }
167
setBoolAttribute(AttributeMap * const headerAttributes,const char * const key,const bool value)168 /* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
169 const char *const key, const bool value) {
170 setIntAttribute(headerAttributes, key, value ? 1 : 0);
171 }
172
setIntAttribute(AttributeMap * const headerAttributes,const char * const key,const int value)173 /* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes,
174 const char *const key, const int value) {
175 AttributeMap::key_type keyVector;
176 insertCharactersIntoVector(key, &keyVector);
177 setIntAttributeInner(headerAttributes, &keyVector, value);
178 }
179
setIntAttributeInner(AttributeMap * const headerAttributes,const AttributeMap::key_type * const key,const int value)180 /* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes,
181 const AttributeMap::key_type *const key, const int value) {
182 AttributeMap::mapped_type valueVector;
183 char charBuf[LARGEST_INT_DIGIT_COUNT];
184 snprintf(charBuf, sizeof(charBuf), "%d", value);
185 insertCharactersIntoVector(charBuf, &valueVector);
186 (*headerAttributes)[*key] = valueVector;
187 }
188
readCodePointVectorAttributeValue(const AttributeMap * const headerAttributes,const char * const key)189 /* static */ const std::vector<int> HeaderReadWriteUtils::readCodePointVectorAttributeValue(
190 const AttributeMap *const headerAttributes, const char *const key) {
191 AttributeMap::key_type keyVector;
192 insertCharactersIntoVector(key, &keyVector);
193 AttributeMap::const_iterator it = headerAttributes->find(keyVector);
194 if (it == headerAttributes->end()) {
195 return std::vector<int>();
196 } else {
197 return it->second;
198 }
199 }
200
readBoolAttributeValue(const AttributeMap * const headerAttributes,const char * const key,const bool defaultValue)201 /* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
202 const AttributeMap *const headerAttributes, const char *const key,
203 const bool defaultValue) {
204 const int intDefaultValue = defaultValue ? 1 : 0;
205 const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue);
206 return intValue != 0;
207 }
208
readIntAttributeValue(const AttributeMap * const headerAttributes,const char * const key,const int defaultValue)209 /* static */ int HeaderReadWriteUtils::readIntAttributeValue(
210 const AttributeMap *const headerAttributes, const char *const key,
211 const int defaultValue) {
212 AttributeMap::key_type keyVector;
213 insertCharactersIntoVector(key, &keyVector);
214 return readIntAttributeValueInner(headerAttributes, &keyVector, defaultValue);
215 }
216
readIntAttributeValueInner(const AttributeMap * const headerAttributes,const AttributeMap::key_type * const key,const int defaultValue)217 /* static */ int HeaderReadWriteUtils::readIntAttributeValueInner(
218 const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
219 const int defaultValue) {
220 AttributeMap::const_iterator it = headerAttributes->find(*key);
221 if (it != headerAttributes->end()) {
222 int value = 0;
223 bool isNegative = false;
224 for (size_t i = 0; i < it->second.size(); ++i) {
225 if (i == 0 && it->second.at(i) == '-') {
226 isNegative = true;
227 } else {
228 if (!isdigit(it->second.at(i))) {
229 // If not a number.
230 return defaultValue;
231 }
232 value *= 10;
233 value += it->second.at(i) - '0';
234 }
235 }
236 return isNegative ? -value : value;
237 }
238 return defaultValue;
239 }
240
insertCharactersIntoVector(const char * const characters,std::vector<int> * const vector)241 /* static */ void HeaderReadWriteUtils::insertCharactersIntoVector(const char *const characters,
242 std::vector<int> *const vector) {
243 for (int i = 0; characters[i]; ++i) {
244 vector->push_back(characters[i]);
245 }
246 }
247
248 } // namespace latinime
249