1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "LatinIME: jni: BinaryDictionary"
18 
19 #include "com_android_inputmethod_latin_BinaryDictionary.h"
20 
21 #include <cstring> // for memset()
22 #include <vector>
23 
24 #include "defines.h"
25 #include "dictionary/property/unigram_property.h"
26 #include "dictionary/property/ngram_context.h"
27 #include "dictionary/property/word_property.h"
28 #include "dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
29 #include "jni.h"
30 #include "jni_common.h"
31 #include "suggest/core/dictionary/dictionary.h"
32 #include "suggest/core/result/suggestion_results.h"
33 #include "suggest/core/suggest_options.h"
34 #include "utils/char_utils.h"
35 #include "utils/int_array_view.h"
36 #include "utils/jni_data_utils.h"
37 #include "utils/log_utils.h"
38 #include "utils/profiler.h"
39 #include "utils/time_keeper.h"
40 
41 namespace latinime {
42 
43 class ProximityInfo;
44 
latinime_BinaryDictionary_open(JNIEnv * env,jclass clazz,jstring sourceDir,jlong dictOffset,jlong dictSize,jboolean isUpdatable)45 static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
46         jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
47     PROF_INIT;
48     PROF_TIMER_START(66);
49     const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
50     if (sourceDirUtf8Length <= 0) {
51         AKLOGE("DICT: Can't get sourceDir string");
52         return 0;
53     }
54     char sourceDirChars[sourceDirUtf8Length + 1];
55     env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
56     sourceDirChars[sourceDirUtf8Length] = '\0';
57     DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy(
58             DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
59                     sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
60                     isUpdatable == JNI_TRUE));
61     if (!dictionaryStructureWithBufferPolicy) {
62         return 0;
63     }
64 
65     Dictionary *const dictionary =
66             new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
67     PROF_TIMER_END(66);
68     return reinterpret_cast<jlong>(dictionary);
69 }
70 
latinime_BinaryDictionary_createOnMemory(JNIEnv * env,jclass clazz,jlong formatVersion,jstring locale,jobjectArray attributeKeyStringArray,jobjectArray attributeValueStringArray)71 static jlong latinime_BinaryDictionary_createOnMemory(JNIEnv *env, jclass clazz,
72         jlong formatVersion, jstring locale, jobjectArray attributeKeyStringArray,
73         jobjectArray attributeValueStringArray) {
74     const jsize localeUtf8Length = env->GetStringUTFLength(locale);
75     char localeChars[localeUtf8Length + 1];
76     env->GetStringUTFRegion(locale, 0, env->GetStringLength(locale), localeChars);
77     localeChars[localeUtf8Length] = '\0';
78     std::vector<int> localeCodePoints;
79     HeaderReadWriteUtils::insertCharactersIntoVector(localeChars, &localeCodePoints);
80     const int keyCount = env->GetArrayLength(attributeKeyStringArray);
81     const int valueCount = env->GetArrayLength(attributeValueStringArray);
82     if (keyCount != valueCount) {
83         return false;
84     }
85     DictionaryHeaderStructurePolicy::AttributeMap attributeMap =
86             JniDataUtils::constructAttributeMap(env, attributeKeyStringArray,
87                     attributeValueStringArray);
88     DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
89             DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
90                     formatVersion, localeCodePoints, &attributeMap);
91     if (!dictionaryStructureWithBufferPolicy) {
92         return 0;
93     }
94     Dictionary *const dictionary =
95             new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
96     return reinterpret_cast<jlong>(dictionary);
97 }
98 
latinime_BinaryDictionary_flush(JNIEnv * env,jclass clazz,jlong dict,jstring filePath)99 static bool latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict,
100         jstring filePath) {
101     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
102     if (!dictionary) return false;
103     const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
104     char filePathChars[filePathUtf8Length + 1];
105     env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
106     filePathChars[filePathUtf8Length] = '\0';
107     return dictionary->flush(filePathChars);
108 }
109 
latinime_BinaryDictionary_needsToRunGC(JNIEnv * env,jclass clazz,jlong dict,jboolean mindsBlockByGC)110 static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
111         jlong dict, jboolean mindsBlockByGC) {
112     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
113     if (!dictionary) return false;
114     return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
115 }
116 
latinime_BinaryDictionary_flushWithGC(JNIEnv * env,jclass clazz,jlong dict,jstring filePath)117 static bool latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
118         jstring filePath) {
119     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
120     if (!dictionary) return false;
121     const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
122     char filePathChars[filePathUtf8Length + 1];
123     env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
124     filePathChars[filePathUtf8Length] = '\0';
125     return dictionary->flushWithGC(filePathChars);
126 }
127 
latinime_BinaryDictionary_close(JNIEnv * env,jclass clazz,jlong dict)128 static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
129     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
130     if (!dictionary) return;
131     delete dictionary;
132 }
133 
latinime_BinaryDictionary_getHeaderInfo(JNIEnv * env,jclass clazz,jlong dict,jintArray outHeaderSize,jintArray outFormatVersion,jobject outAttributeKeys,jobject outAttributeValues)134 static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict,
135         jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys,
136         jobject outAttributeValues) {
137     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
138     if (!dictionary) return;
139     const DictionaryHeaderStructurePolicy *const headerPolicy =
140             dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
141     JniDataUtils::putIntToArray(env, outHeaderSize, 0 /* index */, headerPolicy->getSize());
142     JniDataUtils::putIntToArray(env, outFormatVersion, 0 /* index */,
143             headerPolicy->getFormatVersionNumber());
144     // Output attribute map
145     jclass arrayListClass = env->FindClass("java/util/ArrayList");
146     jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
147     const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap =
148             headerPolicy->getAttributeMap();
149     for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin();
150             it != attributeMap->end(); ++it) {
151         // Output key
152         jintArray keyCodePointArray = env->NewIntArray(it->first.size());
153         JniDataUtils::outputCodePoints(env, keyCodePointArray, 0 /* start */,
154                 it->first.size(), it->first.data(), it->first.size(),
155                 false /* needsNullTermination */);
156         env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray);
157         env->DeleteLocalRef(keyCodePointArray);
158         // Output value
159         jintArray valueCodePointArray = env->NewIntArray(it->second.size());
160         JniDataUtils::outputCodePoints(env, valueCodePointArray, 0 /* start */,
161                 it->second.size(), it->second.data(), it->second.size(),
162                 false /* needsNullTermination */);
163         env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray);
164         env->DeleteLocalRef(valueCodePointArray);
165     }
166     env->DeleteLocalRef(arrayListClass);
167     return;
168 }
169 
latinime_BinaryDictionary_getFormatVersion(JNIEnv * env,jclass clazz,jlong dict)170 static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) {
171     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
172     if (!dictionary) return 0;
173     const DictionaryHeaderStructurePolicy *const headerPolicy =
174             dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
175     return headerPolicy->getFormatVersionNumber();
176 }
177 
latinime_BinaryDictionary_getSuggestions(JNIEnv * env,jclass clazz,jlong dict,jlong proximityInfo,jlong dicTraverseSession,jintArray xCoordinatesArray,jintArray yCoordinatesArray,jintArray timesArray,jintArray pointerIdsArray,jintArray inputCodePointsArray,jint inputSize,jintArray suggestOptions,jobjectArray prevWordCodePointArrays,jbooleanArray isBeginningOfSentenceArray,jint prevWordCount,jintArray outSuggestionCount,jintArray outCodePointsArray,jintArray outScoresArray,jintArray outSpaceIndicesArray,jintArray outTypesArray,jintArray outAutoCommitFirstWordConfidenceArray,jfloatArray inOutWeightOfLangModelVsSpatialModel)178 static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
179         jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
180         jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
181         jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
182         jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
183         jint prevWordCount, jintArray outSuggestionCount, jintArray outCodePointsArray,
184         jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outTypesArray,
185         jintArray outAutoCommitFirstWordConfidenceArray,
186         jfloatArray inOutWeightOfLangModelVsSpatialModel) {
187     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
188     // Assign 0 to outSuggestionCount here in case of returning earlier in this method.
189     JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
190     if (!dictionary) {
191         return;
192     }
193     ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
194     DicTraverseSession *traverseSession =
195             reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
196     if (!traverseSession) {
197         return;
198     }
199     // Input values
200     int xCoordinates[inputSize];
201     int yCoordinates[inputSize];
202     int times[inputSize];
203     int pointerIds[inputSize];
204     const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
205     int inputCodePoints[inputCodePointsLength];
206     env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
207     env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
208     env->GetIntArrayRegion(timesArray, 0, inputSize, times);
209     env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
210     env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
211 
212     const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
213     int options[numberOfOptions];
214     env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options);
215     SuggestOptions givenSuggestOptions(options, numberOfOptions);
216 
217     // Output values
218     /* By the way, let's check the output array length here to make sure */
219     const jsize outputCodePointsLength = env->GetArrayLength(outCodePointsArray);
220     if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) {
221         AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength);
222         ASSERT(false);
223         return;
224     }
225     const jsize scoresLength = env->GetArrayLength(outScoresArray);
226     if (scoresLength != MAX_RESULTS) {
227         AKLOGE("Invalid scoresLength: %d", scoresLength);
228         ASSERT(false);
229         return;
230     }
231     const jsize outputAutoCommitFirstWordConfidenceLength =
232             env->GetArrayLength(outAutoCommitFirstWordConfidenceArray);
233     ASSERT(outputAutoCommitFirstWordConfidenceLength == 1);
234     if (outputAutoCommitFirstWordConfidenceLength != 1) {
235         // We only use the first result, as obviously we will only ever autocommit the first one
236         AKLOGE("Invalid outputAutoCommitFirstWordConfidenceLength: %d",
237                 outputAutoCommitFirstWordConfidenceLength);
238         ASSERT(false);
239         return;
240     }
241     float weightOfLangModelVsSpatialModel;
242     env->GetFloatArrayRegion(inOutWeightOfLangModelVsSpatialModel, 0, 1 /* len */,
243             &weightOfLangModelVsSpatialModel);
244     SuggestionResults suggestionResults(MAX_RESULTS);
245     const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
246             prevWordCodePointArrays, isBeginningOfSentenceArray, prevWordCount);
247     if (givenSuggestOptions.isGesture() || inputSize > 0) {
248         // TODO: Use SuggestionResults to return suggestions.
249         dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
250                 times, pointerIds, inputCodePoints, inputSize, &ngramContext,
251                 &givenSuggestOptions, weightOfLangModelVsSpatialModel, &suggestionResults);
252     } else {
253         dictionary->getPredictions(&ngramContext, &suggestionResults);
254     }
255     if (DEBUG_DICT) {
256         suggestionResults.dumpSuggestions();
257     }
258     suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
259             outScoresArray, outSpaceIndicesArray, outTypesArray,
260             outAutoCommitFirstWordConfidenceArray, inOutWeightOfLangModelVsSpatialModel);
261 }
262 
latinime_BinaryDictionary_getProbability(JNIEnv * env,jclass clazz,jlong dict,jintArray word)263 static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
264         jintArray word) {
265     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
266     if (!dictionary) return NOT_A_PROBABILITY;
267     const jsize codePointCount = env->GetArrayLength(word);
268     int codePoints[codePointCount];
269     env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
270     return dictionary->getProbability(CodePointArrayView(codePoints, codePointCount));
271 }
272 
latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(JNIEnv * env,jclass clazz,jlong dict,jintArray word)273 static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
274         JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
275     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
276     if (!dictionary) return NOT_A_PROBABILITY;
277     const jsize codePointCount = env->GetArrayLength(word);
278     int codePoints[codePointCount];
279     env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
280     return dictionary->getMaxProbabilityOfExactMatches(
281             CodePointArrayView(codePoints, codePointCount));
282 }
283 
latinime_BinaryDictionary_getNgramProbability(JNIEnv * env,jclass clazz,jlong dict,jobjectArray prevWordCodePointArrays,jbooleanArray isBeginningOfSentenceArray,jintArray word)284 static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
285         jlong dict, jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
286         jintArray word) {
287     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
288     if (!dictionary) return JNI_FALSE;
289     const jsize wordLength = env->GetArrayLength(word);
290     int wordCodePoints[wordLength];
291     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
292     const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
293             prevWordCodePointArrays, isBeginningOfSentenceArray,
294             env->GetArrayLength(prevWordCodePointArrays));
295     return dictionary->getNgramProbability(&ngramContext,
296             CodePointArrayView(wordCodePoints, wordLength));
297 }
298 
299 // Method to iterate all words in the dictionary for makedict.
300 // If token is 0, this method newly starts iterating the dictionary. This method returns 0 when
301 // the dictionary does not have a next word.
latinime_BinaryDictionary_getNextWord(JNIEnv * env,jclass clazz,jlong dict,jint token,jintArray outCodePoints,jbooleanArray outIsBeginningOfSentence)302 static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
303         jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) {
304     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
305     if (!dictionary) return 0;
306     const jsize codePointBufSize = env->GetArrayLength(outCodePoints);
307     if (codePointBufSize != MAX_WORD_LENGTH) {
308         AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize);
309         ASSERT(false);
310         return 0;
311     }
312     int wordCodePoints[codePointBufSize];
313     int wordCodePointCount = 0;
314     const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints,
315             &wordCodePointCount);
316     JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
317             MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount,
318             false /* needsNullTermination */);
319     bool isBeginningOfSentence = false;
320     if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
321         isBeginningOfSentence = true;
322     }
323     JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */,
324             isBeginningOfSentence);
325     return nextToken;
326 }
327 
latinime_BinaryDictionary_getWordProperty(JNIEnv * env,jclass clazz,jlong dict,jintArray word,jboolean isBeginningOfSentence,jintArray outCodePoints,jbooleanArray outFlags,jintArray outProbabilityInfo,jobject outNgramPrevWordsArray,jobject outNgramPrevWordIsBeginningOfSentenceArray,jobject outNgramTargets,jobject outNgramProbabilityInfo,jobject outShortcutTargets,jobject outShortcutProbabilities)328 static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
329         jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
330         jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
331         jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
332         jobject outNgramProbabilityInfo, jobject outShortcutTargets,
333         jobject outShortcutProbabilities) {
334     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
335     if (!dictionary) return;
336     const jsize wordLength = env->GetArrayLength(word);
337     if (wordLength > MAX_WORD_LENGTH) {
338         AKLOGE("Invalid wordLength: %d", wordLength);
339         return;
340     }
341     int wordCodePoints[MAX_WORD_LENGTH];
342     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
343     int codePointCount = wordLength;
344     if (isBeginningOfSentence) {
345         codePointCount = CharUtils::attachBeginningOfSentenceMarker(
346                 wordCodePoints, wordLength, MAX_WORD_LENGTH);
347         if (codePointCount < 0) {
348             AKLOGE("Cannot attach Beginning-of-Sentence marker.");
349             return;
350         }
351     }
352     const WordProperty wordProperty = dictionary->getWordProperty(
353             CodePointArrayView(wordCodePoints, codePointCount));
354     JniDataUtils::outputWordProperty(env, wordProperty, outCodePoints, outFlags, outProbabilityInfo,
355             outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
356             outNgramTargets, outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities);
357 }
358 
latinime_BinaryDictionary_addUnigramEntry(JNIEnv * env,jclass clazz,jlong dict,jintArray word,jint probability,jintArray shortcutTarget,jint shortcutProbability,jboolean isBeginningOfSentence,jboolean isNotAWord,jboolean isPossiblyOffensive,jint timestamp)359 static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
360         jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
361         jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isPossiblyOffensive,
362         jint timestamp) {
363     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
364     if (!dictionary) {
365         return false;
366     }
367     jsize codePointCount = env->GetArrayLength(word);
368     int codePoints[codePointCount];
369     env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
370     std::vector<UnigramProperty::ShortcutProperty> shortcuts;
371     {
372         std::vector<int> shortcutTargetCodePoints;
373         JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
374         if (!shortcutTargetCodePoints.empty()) {
375             shortcuts.emplace_back(std::move(shortcutTargetCodePoints), shortcutProbability);
376         }
377     }
378     // Use 1 for count to indicate the word has inputted.
379     const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
380             isPossiblyOffensive, probability, HistoricalInfo(timestamp, 0 /* level */,
381             1 /* count */), std::move(shortcuts));
382     return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
383             &unigramProperty);
384 }
385 
latinime_BinaryDictionary_removeUnigramEntry(JNIEnv * env,jclass clazz,jlong dict,jintArray word)386 static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
387         jintArray word) {
388     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
389     if (!dictionary) {
390         return false;
391     }
392     jsize codePointCount = env->GetArrayLength(word);
393     int codePoints[codePointCount];
394     env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
395     return dictionary->removeUnigramEntry(CodePointArrayView(codePoints, codePointCount));
396 }
397 
latinime_BinaryDictionary_addNgramEntry(JNIEnv * env,jclass clazz,jlong dict,jobjectArray prevWordCodePointArrays,jbooleanArray isBeginningOfSentenceArray,jintArray word,jint probability,jint timestamp)398 static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
399         jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
400         jintArray word, jint probability, jint timestamp) {
401     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
402     if (!dictionary) {
403         return false;
404     }
405     const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
406             prevWordCodePointArrays, isBeginningOfSentenceArray,
407             env->GetArrayLength(prevWordCodePointArrays));
408     jsize wordLength = env->GetArrayLength(word);
409     int wordCodePoints[wordLength];
410     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
411     // Use 1 for count to indicate the ngram has inputted.
412     const NgramProperty ngramProperty(ngramContext,
413             CodePointArrayView(wordCodePoints, wordLength).toVector(),
414             probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
415     return dictionary->addNgramEntry(&ngramProperty);
416 }
417 
latinime_BinaryDictionary_removeNgramEntry(JNIEnv * env,jclass clazz,jlong dict,jobjectArray prevWordCodePointArrays,jbooleanArray isBeginningOfSentenceArray,jintArray word)418 static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
419         jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
420         jintArray word) {
421     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
422     if (!dictionary) {
423         return false;
424     }
425     const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
426             prevWordCodePointArrays, isBeginningOfSentenceArray,
427             env->GetArrayLength(prevWordCodePointArrays));
428     jsize codePointCount = env->GetArrayLength(word);
429     int wordCodePoints[codePointCount];
430     env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
431     return dictionary->removeNgramEntry(&ngramContext,
432             CodePointArrayView(wordCodePoints, codePointCount));
433 }
434 
latinime_BinaryDictionary_updateEntriesForWordWithNgramContext(JNIEnv * env,jclass clazz,jlong dict,jobjectArray prevWordCodePointArrays,jbooleanArray isBeginningOfSentenceArray,jintArray word,jboolean isValidWord,jint count,jint timestamp)435 static bool latinime_BinaryDictionary_updateEntriesForWordWithNgramContext(JNIEnv *env,
436         jclass clazz, jlong dict, jobjectArray prevWordCodePointArrays,
437         jbooleanArray isBeginningOfSentenceArray, jintArray word, jboolean isValidWord, jint count,
438         jint timestamp) {
439     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
440     if (!dictionary) {
441         return false;
442     }
443     const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
444             prevWordCodePointArrays, isBeginningOfSentenceArray,
445             env->GetArrayLength(prevWordCodePointArrays));
446     jsize codePointCount = env->GetArrayLength(word);
447     int wordCodePoints[codePointCount];
448     env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
449     const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
450     return dictionary->updateEntriesForWordWithNgramContext(&ngramContext,
451             CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
452             historicalInfo);
453 }
454 
455 // Returns how many input events are processed.
latinime_BinaryDictionary_updateEntriesForInputEvents(JNIEnv * env,jclass clazz,jlong dict,jobjectArray inputEvents,jint startIndex)456 static int latinime_BinaryDictionary_updateEntriesForInputEvents(JNIEnv *env, jclass clazz,
457         jlong dict, jobjectArray inputEvents, jint startIndex) {
458     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
459     if (!dictionary) {
460         return 0;
461     }
462     jsize inputEventCount = env->GetArrayLength(inputEvents);
463     if (inputEventCount == 0 || startIndex >= inputEventCount) {
464         return 0;
465     }
466     jobject inputEvent = env->GetObjectArrayElement(inputEvents, 0);
467     jclass wordInputEventClass = env->GetObjectClass(inputEvent);
468     env->DeleteLocalRef(inputEvent);
469 
470     jfieldID targetWordFieldId = env->GetFieldID(wordInputEventClass, "mTargetWord", "[I");
471     jfieldID prevWordCountFieldId = env->GetFieldID(wordInputEventClass, "mPrevWordsCount", "I");
472     jfieldID prevWordArrayFieldId = env->GetFieldID(wordInputEventClass, "mPrevWordArray", "[[I");
473     jfieldID isPrevWordBoSArrayFieldId =
474             env->GetFieldID(wordInputEventClass, "mIsPrevWordBeginningOfSentenceArray", "[Z");
475     jfieldID isValidFieldId = env->GetFieldID(wordInputEventClass, "mIsValid", "Z");
476     jfieldID timestampFieldId = env->GetFieldID(wordInputEventClass, "mTimestamp", "I");
477     env->DeleteLocalRef(wordInputEventClass);
478 
479     for (int i = startIndex; i < inputEventCount; ++i) {
480         jobject inputEvent = env->GetObjectArrayElement(inputEvents, i);
481         jintArray targetWord = static_cast<jintArray>(
482                 env->GetObjectField(inputEvent, targetWordFieldId));
483         jsize wordLength = env->GetArrayLength(targetWord);
484         int wordCodePoints[wordLength];
485         env->GetIntArrayRegion(targetWord, 0, wordLength, wordCodePoints);
486         env->DeleteLocalRef(targetWord);
487 
488         jint prevWordCount = env->GetIntField(inputEvent, prevWordCountFieldId);
489         jobjectArray prevWordArray =
490                 static_cast<jobjectArray>(env->GetObjectField(inputEvent, prevWordArrayFieldId));
491         jbooleanArray isPrevWordBeginningOfSentenceArray = static_cast<jbooleanArray>(
492                 env->GetObjectField(inputEvent, isPrevWordBoSArrayFieldId));
493         jboolean isValid = env->GetBooleanField(inputEvent, isValidFieldId);
494         jint timestamp = env->GetIntField(inputEvent, timestampFieldId);
495         const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
496                 prevWordArray, isPrevWordBeginningOfSentenceArray, prevWordCount);
497         // Use 1 for count to indicate the word has inputted.
498         dictionary->updateEntriesForWordWithNgramContext(&ngramContext,
499                 CodePointArrayView(wordCodePoints, wordLength), isValid,
500                 HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
501         if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
502             return i + 1;
503         }
504         env->DeleteLocalRef(prevWordArray);
505         env->DeleteLocalRef(isPrevWordBeginningOfSentenceArray);
506         env->DeleteLocalRef(inputEvent);
507     }
508     return inputEventCount;
509 }
510 
latinime_BinaryDictionary_getProperty(JNIEnv * env,jclass clazz,jlong dict,jstring query)511 static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
512         jstring query) {
513     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
514     if (!dictionary) {
515         return env->NewStringUTF("");
516     }
517     const jsize queryUtf8Length = env->GetStringUTFLength(query);
518     char queryChars[queryUtf8Length + 1];
519     env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars);
520     queryChars[queryUtf8Length] = '\0';
521     static const int GET_PROPERTY_RESULT_LENGTH = 100;
522     char resultChars[GET_PROPERTY_RESULT_LENGTH];
523     resultChars[0] = '\0';
524     dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH);
525     return env->NewStringUTF(resultChars);
526 }
527 
latinime_BinaryDictionary_isCorruptedNative(JNIEnv * env,jclass clazz,jlong dict)528 static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) {
529     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
530     if (!dictionary) {
531         return false;
532     }
533     return dictionary->getDictionaryStructurePolicy()->isCorrupted();
534 }
535 
runGCAndGetNewStructurePolicy(DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,const char * const dictFilePath)536 static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy(
537         DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,
538         const char *const dictFilePath) {
539     structurePolicy->flushWithGC(dictFilePath);
540     structurePolicy.release();
541     return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
542             dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */);
543 }
544 
latinime_BinaryDictionary_migrateNative(JNIEnv * env,jclass clazz,jlong dict,jstring dictFilePath,jlong newFormatVersion)545 static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
546         jstring dictFilePath, jlong newFormatVersion) {
547     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
548     if (!dictionary) {
549         return false;
550     }
551     const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath);
552     char dictFilePathChars[filePathUtf8Length + 1];
553     env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars);
554     dictFilePathChars[filePathUtf8Length] = '\0';
555 
556     const DictionaryHeaderStructurePolicy *const headerPolicy =
557             dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
558     DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
559             DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
560                     newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap());
561     if (!dictionaryStructureWithBufferPolicy) {
562         LogUtils::logToJava(env, "Cannot migrate header.");
563         return false;
564     }
565 
566     int wordCodePoints[MAX_WORD_LENGTH];
567     int wordCodePointCount = 0;
568     int token = 0;
569     // Add unigrams.
570     do {
571         token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
572         const WordProperty wordProperty = dictionary->getWordProperty(
573                 CodePointArrayView(wordCodePoints, wordCodePointCount));
574         if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
575             // Skip beginning-of-sentence unigram.
576             continue;
577         }
578         if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
579             dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
580                     std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
581             if (!dictionaryStructureWithBufferPolicy) {
582                 LogUtils::logToJava(env, "Cannot open dict after GC.");
583                 return false;
584             }
585         }
586         if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(
587                 CodePointArrayView(wordCodePoints, wordCodePointCount),
588                 &wordProperty.getUnigramProperty())) {
589             LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
590             return false;
591         }
592     } while (token != 0);
593 
594     // Add ngrams.
595     do {
596         token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
597         const WordProperty wordProperty = dictionary->getWordProperty(
598                 CodePointArrayView(wordCodePoints, wordCodePointCount));
599         if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
600             dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
601                     std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
602             if (!dictionaryStructureWithBufferPolicy) {
603                 LogUtils::logToJava(env, "Cannot open dict after GC.");
604                 return false;
605             }
606         }
607         for (const NgramProperty &ngramProperty : wordProperty.getNgramProperties()) {
608             if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) {
609                 LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
610                 return false;
611             }
612         }
613     } while (token != 0);
614     // Save to File.
615     dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars);
616     return true;
617 }
618 
619 static const JNINativeMethod sMethods[] = {
620     {
621         const_cast<char *>("openNative"),
622         const_cast<char *>("(Ljava/lang/String;JJZ)J"),
623         reinterpret_cast<void *>(latinime_BinaryDictionary_open)
624     },
625     {
626         const_cast<char *>("createOnMemoryNative"),
627         const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)J"),
628         reinterpret_cast<void *>(latinime_BinaryDictionary_createOnMemory)
629     },
630     {
631         const_cast<char *>("closeNative"),
632         const_cast<char *>("(J)V"),
633         reinterpret_cast<void *>(latinime_BinaryDictionary_close)
634     },
635     {
636         const_cast<char *>("getFormatVersionNative"),
637         const_cast<char *>("(J)I"),
638         reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion)
639     },
640     {
641         const_cast<char *>("getHeaderInfoNative"),
642         const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
643         reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo)
644     },
645     {
646         const_cast<char *>("flushNative"),
647         const_cast<char *>("(JLjava/lang/String;)Z"),
648         reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
649     },
650     {
651         const_cast<char *>("needsToRunGCNative"),
652         const_cast<char *>("(JZ)Z"),
653         reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
654     },
655     {
656         const_cast<char *>("flushWithGCNative"),
657         const_cast<char *>("(JLjava/lang/String;)Z"),
658         reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC)
659     },
660     {
661         const_cast<char *>("getSuggestionsNative"),
662         const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[ZI[I[I[I[I[I[I[F)V"),
663         reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
664     },
665     {
666         const_cast<char *>("getProbabilityNative"),
667         const_cast<char *>("(J[I)I"),
668         reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
669     },
670     {
671         const_cast<char *>("getMaxProbabilityOfExactMatchesNative"),
672         const_cast<char *>("(J[I)I"),
673         reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches)
674     },
675     {
676         const_cast<char *>("getNgramProbabilityNative"),
677         const_cast<char *>("(J[[I[Z[I)I"),
678         reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability)
679     },
680     {
681         const_cast<char *>("getWordPropertyNative"),
682         const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
683                 "Ljava/util/ArrayList;Ljava/util/ArrayList;Ljava/util/ArrayList;"
684                 "Ljava/util/ArrayList;)V"),
685         reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
686     },
687     {
688         const_cast<char *>("getNextWordNative"),
689         const_cast<char *>("(JI[I[Z)I"),
690         reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
691     },
692     {
693         const_cast<char *>("addUnigramEntryNative"),
694         const_cast<char *>("(J[II[IIZZZI)Z"),
695         reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramEntry)
696     },
697     {
698         const_cast<char *>("removeUnigramEntryNative"),
699         const_cast<char *>("(J[I)Z"),
700         reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramEntry)
701     },
702     {
703         const_cast<char *>("addNgramEntryNative"),
704         const_cast<char *>("(J[[I[Z[III)Z"),
705         reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry)
706     },
707     {
708         const_cast<char *>("removeNgramEntryNative"),
709         const_cast<char *>("(J[[I[Z[I)Z"),
710         reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry)
711     },
712     {
713         const_cast<char *>("updateEntriesForWordWithNgramContextNative"),
714         const_cast<char *>("(J[[I[Z[IZII)Z"),
715         reinterpret_cast<void *>(latinime_BinaryDictionary_updateEntriesForWordWithNgramContext)
716     },
717     {
718         const_cast<char *>("updateEntriesForInputEventsNative"),
719         const_cast<char *>(
720                 "(J[Lcom/android/inputmethod/latin/utils/WordInputEventForPersonalization;I)I"),
721         reinterpret_cast<void *>(latinime_BinaryDictionary_updateEntriesForInputEvents)
722     },
723     {
724         const_cast<char *>("getPropertyNative"),
725         const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
726         reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty)
727     },
728     {
729         const_cast<char *>("isCorruptedNative"),
730         const_cast<char *>("(J)Z"),
731         reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative)
732     },
733     {
734         const_cast<char *>("migrateNative"),
735         const_cast<char *>("(JLjava/lang/String;J)Z"),
736         reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative)
737     }
738 };
739 
register_BinaryDictionary(JNIEnv * env)740 int register_BinaryDictionary(JNIEnv *env) {
741     const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
742     return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
743 }
744 } // namespace latinime
745