1 /*
2  * Copyright (C) 2009, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "LatinIME: dictionary.cpp"
18 
19 #include "suggest/core/dictionary/dictionary.h"
20 
21 #include "defines.h"
22 #include "dictionary/interface/dictionary_header_structure_policy.h"
23 #include "dictionary/property/ngram_context.h"
24 #include "suggest/core/dictionary/dictionary_utils.h"
25 #include "suggest/core/result/suggestion_results.h"
26 #include "suggest/core/session/dic_traverse_session.h"
27 #include "suggest/core/suggest.h"
28 #include "suggest/core/suggest_options.h"
29 #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
30 #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
31 #include "utils/int_array_view.h"
32 #include "utils/log_utils.h"
33 #include "utils/time_keeper.h"
34 
35 namespace latinime {
36 
37 const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
38 
Dictionary(JNIEnv * env,DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy)39 Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
40         dictionaryStructureWithBufferPolicy)
41         : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
42           mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
43           mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
44     logDictionaryInfo(env);
45 }
46 
getSuggestions(ProximityInfo * proximityInfo,DicTraverseSession * traverseSession,int * xcoordinates,int * ycoordinates,int * times,int * pointerIds,int * inputCodePoints,int inputSize,const NgramContext * const ngramContext,const SuggestOptions * const suggestOptions,const float weightOfLangModelVsSpatialModel,SuggestionResults * const outSuggestionResults) const47 void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
48         int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
49         int inputSize, const NgramContext *const ngramContext,
50         const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
51         SuggestionResults *const outSuggestionResults) const {
52     TimeKeeper::setCurrentTime();
53     traverseSession->init(this, ngramContext, suggestOptions);
54     const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
55     suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
56             ycoordinates, times, pointerIds, inputCodePoints, inputSize,
57             weightOfLangModelVsSpatialModel, outSuggestionResults);
58 }
59 
NgramListenerForPrediction(const NgramContext * const ngramContext,const WordIdArrayView prevWordIds,SuggestionResults * const suggestionResults,const DictionaryStructureWithBufferPolicy * const dictStructurePolicy)60 Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
61         const NgramContext *const ngramContext, const WordIdArrayView prevWordIds,
62         SuggestionResults *const suggestionResults,
63         const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
64     : mNgramContext(ngramContext), mPrevWordIds(prevWordIds),
65       mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {}
66 
onVisitEntry(const int ngramProbability,const int targetWordId)67 void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
68         const int targetWordId) {
69     if (targetWordId == NOT_A_WORD_ID) {
70         return;
71     }
72     if (mNgramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
73             && ngramProbability == NOT_A_PROBABILITY) {
74         return;
75     }
76     int targetWordCodePoints[MAX_WORD_LENGTH];
77     const int codePointCount = mDictStructurePolicy->getCodePointsAndReturnCodePointCount(
78             targetWordId, MAX_WORD_LENGTH, targetWordCodePoints);
79     if (codePointCount <= 0) {
80         return;
81     }
82     const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext(
83             mPrevWordIds, targetWordId, nullptr /* multiBigramMap */);
84     if (wordAttributes.getProbability() == NOT_A_PROBABILITY) {
85         return;
86     }
87     mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount,
88             wordAttributes.getProbability());
89 }
90 
getPredictions(const NgramContext * const ngramContext,SuggestionResults * const outSuggestionResults) const91 void Dictionary::getPredictions(const NgramContext *const ngramContext,
92         SuggestionResults *const outSuggestionResults) const {
93     TimeKeeper::setCurrentTime();
94     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
95     const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
96             mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
97             true /* tryLowerCaseSearch */);
98     NgramListenerForPrediction listener(ngramContext, prevWordIds, outSuggestionResults,
99             mDictionaryStructureWithBufferPolicy.get());
100     mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
101 }
102 
getProbability(const CodePointArrayView codePoints) const103 int Dictionary::getProbability(const CodePointArrayView codePoints) const {
104     return getNgramProbability(nullptr /* ngramContext */, codePoints);
105 }
106 
getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const107 int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
108     TimeKeeper::setCurrentTime();
109     return DictionaryUtils::getMaxProbabilityOfExactMatches(
110             mDictionaryStructureWithBufferPolicy.get(), codePoints);
111 }
112 
getNgramProbability(const NgramContext * const ngramContext,const CodePointArrayView codePoints) const113 int Dictionary::getNgramProbability(const NgramContext *const ngramContext,
114         const CodePointArrayView codePoints) const {
115     TimeKeeper::setCurrentTime();
116     const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
117             false /* forceLowerCaseSearch */);
118     if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
119     if (!ngramContext) {
120         return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
121     }
122     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
123     const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
124             mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
125             true /* tryLowerCaseSearch */);
126     return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
127 }
128 
addUnigramEntry(const CodePointArrayView codePoints,const UnigramProperty * const unigramProperty)129 bool Dictionary::addUnigramEntry(const CodePointArrayView codePoints,
130         const UnigramProperty *const unigramProperty) {
131     if (unigramProperty->representsBeginningOfSentence()
132             && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
133                     ->supportsBeginningOfSentence()) {
134         AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
135         return false;
136     }
137     TimeKeeper::setCurrentTime();
138     return mDictionaryStructureWithBufferPolicy->addUnigramEntry(codePoints, unigramProperty);
139 }
140 
removeUnigramEntry(const CodePointArrayView codePoints)141 bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
142     TimeKeeper::setCurrentTime();
143     return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
144 }
145 
addNgramEntry(const NgramProperty * const ngramProperty)146 bool Dictionary::addNgramEntry(const NgramProperty *const ngramProperty) {
147     TimeKeeper::setCurrentTime();
148     return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramProperty);
149 }
150 
removeNgramEntry(const NgramContext * const ngramContext,const CodePointArrayView codePoints)151 bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext,
152         const CodePointArrayView codePoints) {
153     TimeKeeper::setCurrentTime();
154     return mDictionaryStructureWithBufferPolicy->removeNgramEntry(ngramContext, codePoints);
155 }
156 
updateEntriesForWordWithNgramContext(const NgramContext * const ngramContext,const CodePointArrayView codePoints,const bool isValidWord,const HistoricalInfo historicalInfo)157 bool Dictionary::updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
158         const CodePointArrayView codePoints, const bool isValidWord,
159         const HistoricalInfo historicalInfo) {
160     TimeKeeper::setCurrentTime();
161     return mDictionaryStructureWithBufferPolicy->updateEntriesForWordWithNgramContext(ngramContext,
162             codePoints, isValidWord, historicalInfo);
163 }
164 
flush(const char * const filePath)165 bool Dictionary::flush(const char *const filePath) {
166     TimeKeeper::setCurrentTime();
167     return mDictionaryStructureWithBufferPolicy->flush(filePath);
168 }
169 
flushWithGC(const char * const filePath)170 bool Dictionary::flushWithGC(const char *const filePath) {
171     TimeKeeper::setCurrentTime();
172     return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
173 }
174 
needsToRunGC(const bool mindsBlockByGC)175 bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
176     TimeKeeper::setCurrentTime();
177     return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
178 }
179 
getProperty(const char * const query,const int queryLength,char * const outResult,const int maxResultLength)180 void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
181         const int maxResultLength) {
182     TimeKeeper::setCurrentTime();
183     return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult,
184             maxResultLength);
185 }
186 
getWordProperty(const CodePointArrayView codePoints)187 const WordProperty Dictionary::getWordProperty(const CodePointArrayView codePoints) {
188     TimeKeeper::setCurrentTime();
189     return mDictionaryStructureWithBufferPolicy->getWordProperty(codePoints);
190 }
191 
getNextWordAndNextToken(const int token,int * const outCodePoints,int * const outCodePointCount)192 int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
193         int *const outCodePointCount) {
194     TimeKeeper::setCurrentTime();
195     return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
196             token, outCodePoints, outCodePointCount);
197 }
198 
logDictionaryInfo(JNIEnv * const env) const199 void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
200     int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
201     int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
202     int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
203     const DictionaryHeaderStructurePolicy *const headerPolicy =
204             getDictionaryStructurePolicy()->getHeaderStructurePolicy();
205     headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
206             HEADER_ATTRIBUTE_BUFFER_SIZE);
207     headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
208             HEADER_ATTRIBUTE_BUFFER_SIZE);
209     headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
210             HEADER_ATTRIBUTE_BUFFER_SIZE);
211 
212     char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
213     char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
214     char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
215     intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
216             dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
217     intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
218             versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
219     intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
220             dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
221 
222     LogUtils::logToJava(env,
223             "Dictionary info: dictionary = %s ; version = %s ; date = %s",
224             dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
225 }
226 
227 } // namespace latinime
228