1 /*
2 * Copyright (C) 2009, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "LatinIME: dictionary.cpp"
18
19 #include "suggest/core/dictionary/dictionary.h"
20
21 #include "defines.h"
22 #include "dictionary/interface/dictionary_header_structure_policy.h"
23 #include "dictionary/property/ngram_context.h"
24 #include "suggest/core/dictionary/dictionary_utils.h"
25 #include "suggest/core/result/suggestion_results.h"
26 #include "suggest/core/session/dic_traverse_session.h"
27 #include "suggest/core/suggest.h"
28 #include "suggest/core/suggest_options.h"
29 #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
30 #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
31 #include "utils/int_array_view.h"
32 #include "utils/log_utils.h"
33 #include "utils/time_keeper.h"
34
35 namespace latinime {
36
37 const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
38
Dictionary(JNIEnv * env,DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy)39 Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
40 dictionaryStructureWithBufferPolicy)
41 : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
42 mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
43 mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
44 logDictionaryInfo(env);
45 }
46
getSuggestions(ProximityInfo * proximityInfo,DicTraverseSession * traverseSession,int * xcoordinates,int * ycoordinates,int * times,int * pointerIds,int * inputCodePoints,int inputSize,const NgramContext * const ngramContext,const SuggestOptions * const suggestOptions,const float weightOfLangModelVsSpatialModel,SuggestionResults * const outSuggestionResults) const47 void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
48 int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
49 int inputSize, const NgramContext *const ngramContext,
50 const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
51 SuggestionResults *const outSuggestionResults) const {
52 TimeKeeper::setCurrentTime();
53 traverseSession->init(this, ngramContext, suggestOptions);
54 const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
55 suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
56 ycoordinates, times, pointerIds, inputCodePoints, inputSize,
57 weightOfLangModelVsSpatialModel, outSuggestionResults);
58 }
59
NgramListenerForPrediction(const NgramContext * const ngramContext,const WordIdArrayView prevWordIds,SuggestionResults * const suggestionResults,const DictionaryStructureWithBufferPolicy * const dictStructurePolicy)60 Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
61 const NgramContext *const ngramContext, const WordIdArrayView prevWordIds,
62 SuggestionResults *const suggestionResults,
63 const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
64 : mNgramContext(ngramContext), mPrevWordIds(prevWordIds),
65 mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {}
66
onVisitEntry(const int ngramProbability,const int targetWordId)67 void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
68 const int targetWordId) {
69 if (targetWordId == NOT_A_WORD_ID) {
70 return;
71 }
72 if (mNgramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
73 && ngramProbability == NOT_A_PROBABILITY) {
74 return;
75 }
76 int targetWordCodePoints[MAX_WORD_LENGTH];
77 const int codePointCount = mDictStructurePolicy->getCodePointsAndReturnCodePointCount(
78 targetWordId, MAX_WORD_LENGTH, targetWordCodePoints);
79 if (codePointCount <= 0) {
80 return;
81 }
82 const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext(
83 mPrevWordIds, targetWordId, nullptr /* multiBigramMap */);
84 if (wordAttributes.getProbability() == NOT_A_PROBABILITY) {
85 return;
86 }
87 mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount,
88 wordAttributes.getProbability());
89 }
90
getPredictions(const NgramContext * const ngramContext,SuggestionResults * const outSuggestionResults) const91 void Dictionary::getPredictions(const NgramContext *const ngramContext,
92 SuggestionResults *const outSuggestionResults) const {
93 TimeKeeper::setCurrentTime();
94 WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
95 const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
96 mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
97 true /* tryLowerCaseSearch */);
98 NgramListenerForPrediction listener(ngramContext, prevWordIds, outSuggestionResults,
99 mDictionaryStructureWithBufferPolicy.get());
100 mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
101 }
102
getProbability(const CodePointArrayView codePoints) const103 int Dictionary::getProbability(const CodePointArrayView codePoints) const {
104 return getNgramProbability(nullptr /* ngramContext */, codePoints);
105 }
106
getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const107 int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
108 TimeKeeper::setCurrentTime();
109 return DictionaryUtils::getMaxProbabilityOfExactMatches(
110 mDictionaryStructureWithBufferPolicy.get(), codePoints);
111 }
112
getNgramProbability(const NgramContext * const ngramContext,const CodePointArrayView codePoints) const113 int Dictionary::getNgramProbability(const NgramContext *const ngramContext,
114 const CodePointArrayView codePoints) const {
115 TimeKeeper::setCurrentTime();
116 const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
117 false /* forceLowerCaseSearch */);
118 if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
119 if (!ngramContext) {
120 return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
121 }
122 WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
123 const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
124 mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
125 true /* tryLowerCaseSearch */);
126 return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
127 }
128
addUnigramEntry(const CodePointArrayView codePoints,const UnigramProperty * const unigramProperty)129 bool Dictionary::addUnigramEntry(const CodePointArrayView codePoints,
130 const UnigramProperty *const unigramProperty) {
131 if (unigramProperty->representsBeginningOfSentence()
132 && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
133 ->supportsBeginningOfSentence()) {
134 AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
135 return false;
136 }
137 TimeKeeper::setCurrentTime();
138 return mDictionaryStructureWithBufferPolicy->addUnigramEntry(codePoints, unigramProperty);
139 }
140
removeUnigramEntry(const CodePointArrayView codePoints)141 bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
142 TimeKeeper::setCurrentTime();
143 return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
144 }
145
addNgramEntry(const NgramProperty * const ngramProperty)146 bool Dictionary::addNgramEntry(const NgramProperty *const ngramProperty) {
147 TimeKeeper::setCurrentTime();
148 return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramProperty);
149 }
150
removeNgramEntry(const NgramContext * const ngramContext,const CodePointArrayView codePoints)151 bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext,
152 const CodePointArrayView codePoints) {
153 TimeKeeper::setCurrentTime();
154 return mDictionaryStructureWithBufferPolicy->removeNgramEntry(ngramContext, codePoints);
155 }
156
updateEntriesForWordWithNgramContext(const NgramContext * const ngramContext,const CodePointArrayView codePoints,const bool isValidWord,const HistoricalInfo historicalInfo)157 bool Dictionary::updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
158 const CodePointArrayView codePoints, const bool isValidWord,
159 const HistoricalInfo historicalInfo) {
160 TimeKeeper::setCurrentTime();
161 return mDictionaryStructureWithBufferPolicy->updateEntriesForWordWithNgramContext(ngramContext,
162 codePoints, isValidWord, historicalInfo);
163 }
164
flush(const char * const filePath)165 bool Dictionary::flush(const char *const filePath) {
166 TimeKeeper::setCurrentTime();
167 return mDictionaryStructureWithBufferPolicy->flush(filePath);
168 }
169
flushWithGC(const char * const filePath)170 bool Dictionary::flushWithGC(const char *const filePath) {
171 TimeKeeper::setCurrentTime();
172 return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
173 }
174
needsToRunGC(const bool mindsBlockByGC)175 bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
176 TimeKeeper::setCurrentTime();
177 return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
178 }
179
getProperty(const char * const query,const int queryLength,char * const outResult,const int maxResultLength)180 void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
181 const int maxResultLength) {
182 TimeKeeper::setCurrentTime();
183 return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult,
184 maxResultLength);
185 }
186
getWordProperty(const CodePointArrayView codePoints)187 const WordProperty Dictionary::getWordProperty(const CodePointArrayView codePoints) {
188 TimeKeeper::setCurrentTime();
189 return mDictionaryStructureWithBufferPolicy->getWordProperty(codePoints);
190 }
191
getNextWordAndNextToken(const int token,int * const outCodePoints,int * const outCodePointCount)192 int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
193 int *const outCodePointCount) {
194 TimeKeeper::setCurrentTime();
195 return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
196 token, outCodePoints, outCodePointCount);
197 }
198
logDictionaryInfo(JNIEnv * const env) const199 void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
200 int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
201 int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
202 int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
203 const DictionaryHeaderStructurePolicy *const headerPolicy =
204 getDictionaryStructurePolicy()->getHeaderStructurePolicy();
205 headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
206 HEADER_ATTRIBUTE_BUFFER_SIZE);
207 headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
208 HEADER_ATTRIBUTE_BUFFER_SIZE);
209 headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
210 HEADER_ATTRIBUTE_BUFFER_SIZE);
211
212 char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
213 char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
214 char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
215 intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
216 dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
217 intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
218 versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
219 intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
220 dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
221
222 LogUtils::logToJava(env,
223 "Dictionary info: dictionary = %s ; version = %s ; date = %s",
224 dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
225 }
226
227 } // namespace latinime
228