1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "dictionary/structure/v4/ver4_dict_buffers.h"
18 
19 #include <cerrno>
20 #include <cstring>
21 #include <fcntl.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <vector>
25 
26 #include "dictionary/utils/byte_array_utils.h"
27 #include "dictionary/utils/dict_file_writing_utils.h"
28 #include "dictionary/utils/file_utils.h"
29 #include "utils/byte_array_view.h"
30 
31 namespace latinime {
32 
openVer4DictBuffers(const char * const dictPath,MmappedBuffer::MmappedBufferPtr && headerBuffer,const FormatUtils::FORMAT_VERSION formatVersion)33 /* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
34         const char *const dictPath, MmappedBuffer::MmappedBufferPtr &&headerBuffer,
35         const FormatUtils::FORMAT_VERSION formatVersion) {
36     if (!headerBuffer) {
37         ASSERT(false);
38         AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
39         return Ver4DictBuffersPtr(nullptr);
40     }
41     // TODO: take only dictDirPath, and open both header and trie files in the constructor below
42     const bool isUpdatable = headerBuffer->isUpdatable();
43     MmappedBuffer::MmappedBufferPtr bodyBuffer = MmappedBuffer::openBuffer(dictPath,
44             Ver4DictConstants::BODY_FILE_EXTENSION, isUpdatable);
45     if (!bodyBuffer) {
46         return Ver4DictBuffersPtr(nullptr);
47     }
48     std::vector<ReadWriteByteArrayView> buffers;
49     const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
50     int position = 0;
51     while (position < static_cast<int>(buffer.size())) {
52         const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
53                 buffer.data(), &position);
54         buffers.push_back(buffer.subView(position, bufferSize));
55         position += bufferSize;
56         if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
57             AKLOGE("The dict body file is corrupted.");
58             return Ver4DictBuffersPtr(nullptr);
59         }
60     }
61     if (buffers.size() != Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE) {
62         AKLOGE("The dict body file is corrupted.");
63         return Ver4DictBuffersPtr(nullptr);
64     }
65     return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
66             formatVersion, buffers));
67 }
68 
flushHeaderAndDictBuffers(const char * const dictDirPath,const BufferWithExtendableBuffer * const headerBuffer) const69 bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
70         const BufferWithExtendableBuffer *const headerBuffer) const {
71     // Create temporary directory.
72     const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
73             DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
74     char tmpDirPath[tmpDirPathBufSize];
75     FileUtils::getFilePathWithSuffix(dictDirPath,
76             DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
77             tmpDirPath);
78     if (FileUtils::existsDir(tmpDirPath)) {
79         if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
80             AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
81             ASSERT(false);
82             return false;
83         }
84     }
85     umask(S_IWGRP | S_IWOTH);
86     if (mkdir(tmpDirPath, S_IRWXU) == -1) {
87         AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
88         return false;
89     }
90     // Get dictionary base path.
91     const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
92     char dictName[dictNameBufSize];
93     FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
94     const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
95     char dictPath[dictPathBufSize];
96     FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
97 
98     // Write header file.
99     if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
100             Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
101         AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
102                 Ver4DictConstants::HEADER_FILE_EXTENSION);
103         return false;
104     }
105 
106     // Write body file.
107     const int bodyFilePathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictPath,
108             Ver4DictConstants::BODY_FILE_EXTENSION);
109     char bodyFilePath[bodyFilePathBufSize];
110     FileUtils::getFilePathWithSuffix(dictPath, Ver4DictConstants::BODY_FILE_EXTENSION,
111             bodyFilePathBufSize, bodyFilePath);
112 
113     const int fd = open(bodyFilePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
114     if (fd == -1) {
115         AKLOGE("File %s cannot be opened. errno: %d", bodyFilePath, errno);
116         ASSERT(false);
117         return false;
118     }
119     FILE *const file = fdopen(fd, "wb");
120     if (!file) {
121         AKLOGE("fdopen failed for the file %s. errno: %d", bodyFilePath, errno);
122         ASSERT(false);
123         return false;
124     }
125 
126     if (!flushDictBuffers(file)) {
127         fclose(file);
128         return false;
129     }
130     fclose(file);
131     // Remove existing dictionary.
132     if (!FileUtils::removeDirAndFiles(dictDirPath)) {
133         AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
134         ASSERT(false);
135         return false;
136     }
137     // Rename temporary directory.
138     if (rename(tmpDirPath, dictDirPath) != 0) {
139         AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
140         ASSERT(false);
141         return false;
142     }
143     return true;
144 }
145 
flushDictBuffers(FILE * const file) const146 bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
147     // Write trie.
148     if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableTrieBuffer)) {
149         AKLOGE("Trie cannot be written.");
150         return false;
151     }
152     // Write terminal position lookup table.
153     if (!mTerminalPositionLookupTable.flushToFile(file)) {
154         AKLOGE("Terminal position lookup table cannot be written.");
155         return false;
156     }
157     // Write language model content.
158     if (!mLanguageModelDictContent.save(file)) {
159         AKLOGE("Language model dict content cannot be written.");
160         return false;
161     }
162     // Write shortcut dict content.
163     if (!mShortcutDictContent.flushToFile(file)) {
164         AKLOGE("Shortcut dict content cannot be written.");
165         return false;
166     }
167     return true;
168 }
169 
Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr && headerBuffer,MmappedBuffer::MmappedBufferPtr && bodyBuffer,const FormatUtils::FORMAT_VERSION formatVersion,const std::vector<ReadWriteByteArrayView> & contentBuffers)170 Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
171         MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
172         const FormatUtils::FORMAT_VERSION formatVersion,
173         const std::vector<ReadWriteByteArrayView> &contentBuffers)
174         : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
175           mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
176           mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
177                   BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
178           mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
179                   BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
180           mTerminalPositionLookupTable(
181                   contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
182           mLanguageModelDictContent(&contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
183                   mHeaderPolicy.hasHistoricalInfoOfWords()),
184           mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
185           mIsUpdatable(mDictBuffer->isUpdatable()) {}
186 
Ver4DictBuffers(const HeaderPolicy * const headerPolicy,const int maxTrieSize)187 Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
188         : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
189           mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
190           mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
191           mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
192           mShortcutDictContent(),  mIsUpdatable(true) {}
193 
194 } // namespace latinime
195