1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.text.TextUtils; 20 import android.util.Log; 21 import android.util.SparseArray; 22 23 import com.android.inputmethod.annotations.UsedForTesting; 24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 25 import com.android.inputmethod.latin.common.ComposedData; 26 import com.android.inputmethod.latin.common.Constants; 27 import com.android.inputmethod.latin.common.FileUtils; 28 import com.android.inputmethod.latin.common.InputPointers; 29 import com.android.inputmethod.latin.common.StringUtils; 30 import com.android.inputmethod.latin.makedict.DictionaryHeader; 31 import com.android.inputmethod.latin.makedict.FormatSpec; 32 import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; 33 import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 34 import com.android.inputmethod.latin.makedict.WordProperty; 35 import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion; 36 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 37 import com.android.inputmethod.latin.utils.JniUtils; 38 import com.android.inputmethod.latin.utils.WordInputEventForPersonalization; 39 40 import java.io.File; 41 import java.util.ArrayList; 42 import java.util.Arrays; 43 import java.util.HashMap; 44 import java.util.Locale; 45 import java.util.Map; 46 47 import javax.annotation.Nonnull; 48 49 /** 50 * Implements a static, compacted, binary dictionary of standard words. 51 */ 52 // TODO: All methods which should be locked need to have a suffix "Locked". 53 public final class BinaryDictionary extends Dictionary { 54 private static final String TAG = BinaryDictionary.class.getSimpleName(); 55 56 // The cutoff returned by native for auto-commit confidence. 57 // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h 58 private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000; 59 60 public static final int DICTIONARY_MAX_WORD_LENGTH = 48; 61 public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3; 62 63 @UsedForTesting 64 public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; 65 @UsedForTesting 66 public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; 67 @UsedForTesting 68 public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; 69 @UsedForTesting 70 public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; 71 72 public static final int NOT_A_VALID_TIMESTAMP = -1; 73 74 // Format to get unigram flags from native side via getWordPropertyNative(). 75 private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5; 76 private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; 77 private static final int FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX = 1; 78 private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2; 79 private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; // DEPRECATED 80 private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4; 81 82 // Format to get probability and historical info from native side via getWordPropertyNative(). 83 public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4; 84 public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0; 85 public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1; 86 public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2; 87 public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3; 88 89 public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate"; 90 public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating"; 91 92 private long mNativeDict; 93 private final long mDictSize; 94 private final String mDictFilePath; 95 private final boolean mUseFullEditDistance; 96 private final boolean mIsUpdatable; 97 private boolean mHasUpdated; 98 99 private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>(); 100 101 // TODO: There should be a way to remove used DicTraverseSession objects from 102 // {@code mDicTraverseSessions}. getTraverseSession(final int traverseSessionId)103 private DicTraverseSession getTraverseSession(final int traverseSessionId) { 104 synchronized(mDicTraverseSessions) { 105 DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId); 106 if (traverseSession == null) { 107 traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize); 108 mDicTraverseSessions.put(traverseSessionId, traverseSession); 109 } 110 return traverseSession; 111 } 112 } 113 114 /** 115 * Constructs binary dictionary using existing dictionary file. 116 * @param filename the name of the file to read through native code. 117 * @param offset the offset of the dictionary data within the file. 118 * @param length the length of the binary data. 119 * @param useFullEditDistance whether to use the full edit distance in suggestions 120 * @param dictType the dictionary type, as a human-readable string 121 * @param isUpdatable whether to open the dictionary file in writable mode. 122 */ BinaryDictionary(final String filename, final long offset, final long length, final boolean useFullEditDistance, final Locale locale, final String dictType, final boolean isUpdatable)123 public BinaryDictionary(final String filename, final long offset, final long length, 124 final boolean useFullEditDistance, final Locale locale, final String dictType, 125 final boolean isUpdatable) { 126 super(dictType, locale); 127 mDictSize = length; 128 mDictFilePath = filename; 129 mIsUpdatable = isUpdatable; 130 mHasUpdated = false; 131 mUseFullEditDistance = useFullEditDistance; 132 loadDictionary(filename, offset, length, isUpdatable); 133 } 134 135 /** 136 * Constructs binary dictionary on memory. 137 * @param filename the name of the file used to flush. 138 * @param useFullEditDistance whether to use the full edit distance in suggestions 139 * @param dictType the dictionary type, as a human-readable string 140 * @param formatVersion the format version of the dictionary 141 * @param attributeMap the attributes of the dictionary 142 */ BinaryDictionary(final String filename, final boolean useFullEditDistance, final Locale locale, final String dictType, final long formatVersion, final Map<String, String> attributeMap)143 public BinaryDictionary(final String filename, final boolean useFullEditDistance, 144 final Locale locale, final String dictType, final long formatVersion, 145 final Map<String, String> attributeMap) { 146 super(dictType, locale); 147 mDictSize = 0; 148 mDictFilePath = filename; 149 // On memory dictionary is always updatable. 150 mIsUpdatable = true; 151 mHasUpdated = false; 152 mUseFullEditDistance = useFullEditDistance; 153 final String[] keyArray = new String[attributeMap.size()]; 154 final String[] valueArray = new String[attributeMap.size()]; 155 int index = 0; 156 for (final String key : attributeMap.keySet()) { 157 keyArray[index] = key; 158 valueArray[index] = attributeMap.get(key); 159 index++; 160 } 161 mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray); 162 } 163 164 165 static { JniUtils.loadNativeLibrary()166 JniUtils.loadNativeLibrary(); 167 } 168 openNative(String sourceDir, long dictOffset, long dictSize, boolean isUpdatable)169 private static native long openNative(String sourceDir, long dictOffset, long dictSize, 170 boolean isUpdatable); createOnMemoryNative(long formatVersion, String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray)171 private static native long createOnMemoryNative(long formatVersion, 172 String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray); getHeaderInfoNative(long dict, int[] outHeaderSize, int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, ArrayList<int[]> outAttributeValues)173 private static native void getHeaderInfoNative(long dict, int[] outHeaderSize, 174 int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, 175 ArrayList<int[]> outAttributeValues); flushNative(long dict, String filePath)176 private static native boolean flushNative(long dict, String filePath); needsToRunGCNative(long dict, boolean mindsBlockByGC)177 private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); flushWithGCNative(long dict, String filePath)178 private static native boolean flushWithGCNative(long dict, String filePath); closeNative(long dict)179 private static native void closeNative(long dict); getFormatVersionNative(long dict)180 private static native int getFormatVersionNative(long dict); getProbabilityNative(long dict, int[] word)181 private static native int getProbabilityNative(long dict, int[] word); getMaxProbabilityOfExactMatchesNative(long dict, int[] word)182 private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word); getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word)183 private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays, 184 boolean[] isBeginningOfSentenceArray, int[] word); getWordPropertyNative(long dict, int[] word, boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray, ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray, ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo, ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities)185 private static native void getWordPropertyNative(long dict, int[] word, 186 boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, 187 int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray, 188 ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray, 189 ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo, 190 ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities); getNextWordNative(long dict, int token, int[] outCodePoints, boolean[] outIsBeginningOfSentence)191 private static native int getNextWordNative(long dict, int token, int[] outCodePoints, 192 boolean[] outIsBeginningOfSentence); getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence, float[] inOutWeightOfLangModelVsSpatialModel)193 private static native void getSuggestionsNative(long dict, long proximityInfo, 194 long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, 195 int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, 196 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, 197 int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints, 198 int[] outputScores, int[] outputIndices, int[] outputTypes, 199 int[] outputAutoCommitFirstWordConfidence, 200 float[] inOutWeightOfLangModelVsSpatialModel); addUnigramEntryNative(long dict, int[] word, int probability, int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, boolean isNotAWord, boolean isPossiblyOffensive, int timestamp)201 private static native boolean addUnigramEntryNative(long dict, int[] word, int probability, 202 int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, 203 boolean isNotAWord, boolean isPossiblyOffensive, int timestamp); removeUnigramEntryNative(long dict, int[] word)204 private static native boolean removeUnigramEntryNative(long dict, int[] word); addNgramEntryNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word, int probability, int timestamp)205 private static native boolean addNgramEntryNative(long dict, 206 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, 207 int[] word, int probability, int timestamp); removeNgramEntryNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word)208 private static native boolean removeNgramEntryNative(long dict, 209 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word); updateEntriesForWordWithNgramContextNative(long dict, int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word, boolean isValidWord, int count, int timestamp)210 private static native boolean updateEntriesForWordWithNgramContextNative(long dict, 211 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, 212 int[] word, boolean isValidWord, int count, int timestamp); updateEntriesForInputEventsNative(long dict, WordInputEventForPersonalization[] inputEvents, int startIndex)213 private static native int updateEntriesForInputEventsNative(long dict, 214 WordInputEventForPersonalization[] inputEvents, int startIndex); getPropertyNative(long dict, String query)215 private static native String getPropertyNative(long dict, String query); isCorruptedNative(long dict)216 private static native boolean isCorruptedNative(long dict); migrateNative(long dict, String dictFilePath, long newFormatVersion)217 private static native boolean migrateNative(long dict, String dictFilePath, 218 long newFormatVersion); 219 220 // TODO: Move native dict into session loadDictionary(final String path, final long startOffset, final long length, final boolean isUpdatable)221 private void loadDictionary(final String path, final long startOffset, 222 final long length, final boolean isUpdatable) { 223 mHasUpdated = false; 224 mNativeDict = openNative(path, startOffset, length, isUpdatable); 225 } 226 227 // TODO: Check isCorrupted() for main dictionaries. isCorrupted()228 public boolean isCorrupted() { 229 if (!isValidDictionary()) { 230 return false; 231 } 232 if (!isCorruptedNative(mNativeDict)) { 233 return false; 234 } 235 // TODO: Record the corruption. 236 Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted."); 237 Log.e(TAG, "locale: " + mLocale); 238 Log.e(TAG, "dict size: " + mDictSize); 239 Log.e(TAG, "updatable: " + mIsUpdatable); 240 return true; 241 } 242 getHeader()243 public DictionaryHeader getHeader() throws UnsupportedFormatException { 244 if (mNativeDict == 0) { 245 return null; 246 } 247 final int[] outHeaderSize = new int[1]; 248 final int[] outFormatVersion = new int[1]; 249 final ArrayList<int[]> outAttributeKeys = new ArrayList<>(); 250 final ArrayList<int[]> outAttributeValues = new ArrayList<>(); 251 getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys, 252 outAttributeValues); 253 final HashMap<String, String> attributes = new HashMap<>(); 254 for (int i = 0; i < outAttributeKeys.size(); i++) { 255 final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray( 256 outAttributeKeys.get(i)); 257 final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray( 258 outAttributeValues.get(i)); 259 attributes.put(attributeKey, attributeValue); 260 } 261 final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals( 262 attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY)); 263 return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes), 264 new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo)); 265 } 266 267 @Override getSuggestions(final ComposedData composedData, final NgramContext ngramContext, final long proximityInfoHandle, final SettingsValuesForSuggestion settingsValuesForSuggestion, final int sessionId, final float weightForLocale, final float[] inOutWeightOfLangModelVsSpatialModel)268 public ArrayList<SuggestedWordInfo> getSuggestions(final ComposedData composedData, 269 final NgramContext ngramContext, final long proximityInfoHandle, 270 final SettingsValuesForSuggestion settingsValuesForSuggestion, 271 final int sessionId, final float weightForLocale, 272 final float[] inOutWeightOfLangModelVsSpatialModel) { 273 if (!isValidDictionary()) { 274 return null; 275 } 276 final DicTraverseSession session = getTraverseSession(sessionId); 277 Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE); 278 ngramContext.outputToArray(session.mPrevWordCodePointArrays, 279 session.mIsBeginningOfSentenceArray); 280 final InputPointers inputPointers = composedData.mInputPointers; 281 final boolean isGesture = composedData.mIsBatchMode; 282 final int inputSize; 283 if (!isGesture) { 284 inputSize = 285 composedData.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount( 286 session.mInputCodePoints); 287 if (inputSize < 0) { 288 return null; 289 } 290 } else { 291 inputSize = inputPointers.getPointerSize(); 292 } 293 session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance); 294 session.mNativeSuggestOptions.setIsGesture(isGesture); 295 session.mNativeSuggestOptions.setBlockOffensiveWords( 296 settingsValuesForSuggestion.mBlockPotentiallyOffensive); 297 session.mNativeSuggestOptions.setWeightForLocale(weightForLocale); 298 if (inOutWeightOfLangModelVsSpatialModel != null) { 299 session.mInputOutputWeightOfLangModelVsSpatialModel[0] = 300 inOutWeightOfLangModelVsSpatialModel[0]; 301 } else { 302 session.mInputOutputWeightOfLangModelVsSpatialModel[0] = 303 Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL; 304 } 305 // TOOD: Pass multiple previous words information for n-gram. 306 getSuggestionsNative(mNativeDict, proximityInfoHandle, 307 getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(), 308 inputPointers.getYCoordinates(), inputPointers.getTimes(), 309 inputPointers.getPointerIds(), session.mInputCodePoints, inputSize, 310 session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays, 311 session.mIsBeginningOfSentenceArray, ngramContext.getPrevWordCount(), 312 session.mOutputSuggestionCount, session.mOutputCodePoints, session.mOutputScores, 313 session.mSpaceIndices, session.mOutputTypes, 314 session.mOutputAutoCommitFirstWordConfidence, 315 session.mInputOutputWeightOfLangModelVsSpatialModel); 316 if (inOutWeightOfLangModelVsSpatialModel != null) { 317 inOutWeightOfLangModelVsSpatialModel[0] = 318 session.mInputOutputWeightOfLangModelVsSpatialModel[0]; 319 } 320 final int count = session.mOutputSuggestionCount[0]; 321 final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>(); 322 for (int j = 0; j < count; ++j) { 323 final int start = j * DICTIONARY_MAX_WORD_LENGTH; 324 int len = 0; 325 while (len < DICTIONARY_MAX_WORD_LENGTH 326 && session.mOutputCodePoints[start + len] != 0) { 327 ++len; 328 } 329 if (len > 0) { 330 suggestions.add(new SuggestedWordInfo( 331 new String(session.mOutputCodePoints, start, len), 332 "" /* prevWordsContext */, 333 (int)(session.mOutputScores[j] * weightForLocale), 334 session.mOutputTypes[j], 335 this /* sourceDict */, 336 session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */, 337 session.mOutputAutoCommitFirstWordConfidence[0])); 338 } 339 } 340 return suggestions; 341 } 342 isValidDictionary()343 public boolean isValidDictionary() { 344 return mNativeDict != 0; 345 } 346 getFormatVersion()347 public int getFormatVersion() { 348 return getFormatVersionNative(mNativeDict); 349 } 350 351 @Override isInDictionary(final String word)352 public boolean isInDictionary(final String word) { 353 return getFrequency(word) != NOT_A_PROBABILITY; 354 } 355 356 @Override getFrequency(final String word)357 public int getFrequency(final String word) { 358 if (TextUtils.isEmpty(word)) { 359 return NOT_A_PROBABILITY; 360 } 361 final int[] codePoints = StringUtils.toCodePointArray(word); 362 return getProbabilityNative(mNativeDict, codePoints); 363 } 364 365 @Override getMaxFrequencyOfExactMatches(final String word)366 public int getMaxFrequencyOfExactMatches(final String word) { 367 if (TextUtils.isEmpty(word)) { 368 return NOT_A_PROBABILITY; 369 } 370 final int[] codePoints = StringUtils.toCodePointArray(word); 371 return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints); 372 } 373 374 @UsedForTesting isValidNgram(final NgramContext ngramContext, final String word)375 public boolean isValidNgram(final NgramContext ngramContext, final String word) { 376 return getNgramProbability(ngramContext, word) != NOT_A_PROBABILITY; 377 } 378 getNgramProbability(final NgramContext ngramContext, final String word)379 public int getNgramProbability(final NgramContext ngramContext, final String word) { 380 if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { 381 return NOT_A_PROBABILITY; 382 } 383 final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; 384 final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; 385 ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 386 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 387 return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays, 388 isBeginningOfSentenceArray, wordCodePoints); 389 } 390 getWordProperty(final String word, final boolean isBeginningOfSentence)391 public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) { 392 if (word == null) { 393 return null; 394 } 395 final int[] codePoints = StringUtils.toCodePointArray(word); 396 final int[] outCodePoints = new int[DICTIONARY_MAX_WORD_LENGTH]; 397 final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT]; 398 final int[] outProbabilityInfo = 399 new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT]; 400 final ArrayList<int[][]> outNgramPrevWordsArray = new ArrayList<>(); 401 final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray = 402 new ArrayList<>(); 403 final ArrayList<int[]> outNgramTargets = new ArrayList<>(); 404 final ArrayList<int[]> outNgramProbabilityInfo = new ArrayList<>(); 405 final ArrayList<int[]> outShortcutTargets = new ArrayList<>(); 406 final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>(); 407 getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints, 408 outFlags, outProbabilityInfo, outNgramPrevWordsArray, 409 outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets, 410 outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); 411 return new WordProperty(codePoints, 412 outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], 413 outFlags[FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX], 414 outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX], 415 outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo, 416 outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray, 417 outNgramTargets, outNgramProbabilityInfo); 418 } 419 420 public static class GetNextWordPropertyResult { 421 public WordProperty mWordProperty; 422 public int mNextToken; 423 GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken)424 public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) { 425 mWordProperty = wordProperty; 426 mNextToken = nextToken; 427 } 428 } 429 430 /** 431 * Method to iterate all words in the dictionary for makedict. 432 * If token is 0, this method newly starts iterating the dictionary. 433 */ getNextWordProperty(final int token)434 public GetNextWordPropertyResult getNextWordProperty(final int token) { 435 final int[] codePoints = new int[DICTIONARY_MAX_WORD_LENGTH]; 436 final boolean[] isBeginningOfSentence = new boolean[1]; 437 final int nextToken = getNextWordNative(mNativeDict, token, codePoints, 438 isBeginningOfSentence); 439 final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); 440 return new GetNextWordPropertyResult( 441 getWordProperty(word, isBeginningOfSentence[0]), nextToken); 442 } 443 444 // Add a unigram entry to binary dictionary with unigram attributes in native code. addUnigramEntry( final String word, final int probability, final boolean isBeginningOfSentence, final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp)445 public boolean addUnigramEntry( 446 final String word, final int probability, final boolean isBeginningOfSentence, 447 final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) { 448 if (word == null || (word.isEmpty() && !isBeginningOfSentence)) { 449 return false; 450 } 451 final int[] codePoints = StringUtils.toCodePointArray(word); 452 if (!addUnigramEntryNative(mNativeDict, codePoints, probability, 453 null /* shortcutTargetCodePoints */, 0 /* shortcutProbability */, 454 isBeginningOfSentence, isNotAWord, isPossiblyOffensive, timestamp)) { 455 return false; 456 } 457 mHasUpdated = true; 458 return true; 459 } 460 461 // Remove a unigram entry from the binary dictionary in native code. removeUnigramEntry(final String word)462 public boolean removeUnigramEntry(final String word) { 463 if (TextUtils.isEmpty(word)) { 464 return false; 465 } 466 final int[] codePoints = StringUtils.toCodePointArray(word); 467 if (!removeUnigramEntryNative(mNativeDict, codePoints)) { 468 return false; 469 } 470 mHasUpdated = true; 471 return true; 472 } 473 474 // Add an n-gram entry to the binary dictionary with timestamp in native code. addNgramEntry(final NgramContext ngramContext, final String word, final int probability, final int timestamp)475 public boolean addNgramEntry(final NgramContext ngramContext, final String word, 476 final int probability, final int timestamp) { 477 if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { 478 return false; 479 } 480 final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; 481 final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; 482 ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 483 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 484 if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays, 485 isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) { 486 return false; 487 } 488 mHasUpdated = true; 489 return true; 490 } 491 492 // Update entries for the word occurrence with the ngramContext. updateEntriesForWordWithNgramContext(@onnull final NgramContext ngramContext, final String word, final boolean isValidWord, final int count, final int timestamp)493 public boolean updateEntriesForWordWithNgramContext(@Nonnull final NgramContext ngramContext, 494 final String word, final boolean isValidWord, final int count, final int timestamp) { 495 if (TextUtils.isEmpty(word)) { 496 return false; 497 } 498 final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; 499 final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; 500 ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 501 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 502 if (!updateEntriesForWordWithNgramContextNative(mNativeDict, prevWordCodePointArrays, 503 isBeginningOfSentenceArray, wordCodePoints, isValidWord, count, timestamp)) { 504 return false; 505 } 506 mHasUpdated = true; 507 return true; 508 } 509 510 @UsedForTesting updateEntriesForInputEvents(final WordInputEventForPersonalization[] inputEvents)511 public void updateEntriesForInputEvents(final WordInputEventForPersonalization[] inputEvents) { 512 if (!isValidDictionary()) { 513 return; 514 } 515 int processedEventCount = 0; 516 while (processedEventCount < inputEvents.length) { 517 if (needsToRunGC(true /* mindsBlockByGC */)) { 518 flushWithGC(); 519 } 520 processedEventCount = updateEntriesForInputEventsNative(mNativeDict, inputEvents, 521 processedEventCount); 522 mHasUpdated = true; 523 if (processedEventCount <= 0) { 524 return; 525 } 526 } 527 } 528 reopen()529 private void reopen() { 530 close(); 531 final File dictFile = new File(mDictFilePath); 532 // WARNING: Because we pass 0 as the offset and file.length() as the length, this can 533 // only be called for actual files. Right now it's only called by the flush() family of 534 // functions, which require an updatable dictionary, so it's okay. But beware. 535 loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, 536 dictFile.length(), mIsUpdatable); 537 } 538 539 // Flush to dict file if the dictionary has been updated. flush()540 public boolean flush() { 541 if (!isValidDictionary()) { 542 return false; 543 } 544 if (mHasUpdated) { 545 if (!flushNative(mNativeDict, mDictFilePath)) { 546 return false; 547 } 548 reopen(); 549 } 550 return true; 551 } 552 553 // Run GC and flush to dict file if the dictionary has been updated. flushWithGCIfHasUpdated()554 public boolean flushWithGCIfHasUpdated() { 555 if (mHasUpdated) { 556 return flushWithGC(); 557 } 558 return true; 559 } 560 561 // Run GC and flush to dict file. flushWithGC()562 public boolean flushWithGC() { 563 if (!isValidDictionary()) { 564 return false; 565 } 566 if (!flushWithGCNative(mNativeDict, mDictFilePath)) { 567 return false; 568 } 569 reopen(); 570 return true; 571 } 572 573 /** 574 * Checks whether GC is needed to run or not. 575 * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about 576 * the blocking in some situations such as in idle time or just before closing. 577 * @return whether GC is needed to run or not. 578 */ needsToRunGC(final boolean mindsBlockByGC)579 public boolean needsToRunGC(final boolean mindsBlockByGC) { 580 if (!isValidDictionary()) { 581 return false; 582 } 583 return needsToRunGCNative(mNativeDict, mindsBlockByGC); 584 } 585 migrateTo(final int newFormatVersion)586 public boolean migrateTo(final int newFormatVersion) { 587 if (!isValidDictionary()) { 588 return false; 589 } 590 final File isMigratingDir = 591 new File(mDictFilePath + DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION); 592 if (isMigratingDir.exists()) { 593 isMigratingDir.delete(); 594 Log.e(TAG, "Previous migration attempt failed probably due to a crash. " 595 + "Giving up using the old dictionary (" + mDictFilePath + ")."); 596 return false; 597 } 598 if (!isMigratingDir.mkdir()) { 599 Log.e(TAG, "Cannot create a dir (" + isMigratingDir.getAbsolutePath() 600 + ") to record migration."); 601 return false; 602 } 603 try { 604 final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION; 605 if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) { 606 return false; 607 } 608 close(); 609 final File dictFile = new File(mDictFilePath); 610 final File tmpDictFile = new File(tmpDictFilePath); 611 if (!FileUtils.deleteRecursively(dictFile)) { 612 return false; 613 } 614 if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) { 615 return false; 616 } 617 loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, 618 dictFile.length(), mIsUpdatable); 619 return true; 620 } finally { 621 isMigratingDir.delete(); 622 } 623 } 624 625 @UsedForTesting getPropertyForGettingStats(final String query)626 public String getPropertyForGettingStats(final String query) { 627 if (!isValidDictionary()) { 628 return ""; 629 } 630 return getPropertyNative(mNativeDict, query); 631 } 632 633 @Override shouldAutoCommit(final SuggestedWordInfo candidate)634 public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { 635 return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT; 636 } 637 638 @Override close()639 public void close() { 640 synchronized (mDicTraverseSessions) { 641 final int sessionsSize = mDicTraverseSessions.size(); 642 for (int index = 0; index < sessionsSize; ++index) { 643 final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index); 644 if (traverseSession != null) { 645 traverseSession.close(); 646 } 647 } 648 mDicTraverseSessions.clear(); 649 } 650 closeInternalLocked(); 651 } 652 closeInternalLocked()653 private synchronized void closeInternalLocked() { 654 if (mNativeDict != 0) { 655 closeNative(mNativeDict); 656 mNativeDict = 0; 657 } 658 } 659 660 // TODO: Manage BinaryDictionary instances without using WeakReference or something. 661 @Override finalize()662 protected void finalize() throws Throwable { 663 try { 664 closeInternalLocked(); 665 } finally { 666 super.finalize(); 667 } 668 } 669 } 670