1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.text.TextUtils; 20 21 import com.android.inputmethod.annotations.UsedForTesting; 22 import com.android.inputmethod.latin.common.StringUtils; 23 import com.android.inputmethod.latin.define.DecoderSpecificConstants; 24 25 import java.util.ArrayList; 26 import java.util.Arrays; 27 28 import javax.annotation.Nonnull; 29 30 /** 31 * Class to represent information of previous words. This class is used to add n-gram entries 32 * into binary dictionaries, to get predictions, and to get suggestions. 33 */ 34 public class NgramContext { 35 @Nonnull 36 public static final NgramContext EMPTY_PREV_WORDS_INFO = 37 new NgramContext(WordInfo.EMPTY_WORD_INFO); 38 @Nonnull 39 public static final NgramContext BEGINNING_OF_SENTENCE = 40 new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO); 41 42 public static final String BEGINNING_OF_SENTENCE_TAG = "<S>"; 43 44 public static final String CONTEXT_SEPARATOR = " "; 45 getEmptyPrevWordsContext(int maxPrevWordCount)46 public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) { 47 return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO); 48 } 49 50 /** 51 * Word information used to represent previous words information. 52 */ 53 public static class WordInfo { 54 @Nonnull 55 public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); 56 @Nonnull 57 public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo(); 58 59 // This is an empty char sequence when mIsBeginningOfSentence is true. 60 public final CharSequence mWord; 61 // TODO: Have sentence separator. 62 // Whether the current context is beginning of sentence or not. This is true when composing 63 // at the beginning of an input field or composing a word after a sentence separator. 64 public final boolean mIsBeginningOfSentence; 65 66 // Beginning of sentence. WordInfo()67 private WordInfo() { 68 mWord = ""; 69 mIsBeginningOfSentence = true; 70 } 71 WordInfo(final CharSequence word)72 public WordInfo(final CharSequence word) { 73 mWord = word; 74 mIsBeginningOfSentence = false; 75 } 76 isValid()77 public boolean isValid() { 78 return mWord != null; 79 } 80 81 @Override hashCode()82 public int hashCode() { 83 return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); 84 } 85 86 @Override equals(Object o)87 public boolean equals(Object o) { 88 if (this == o) return true; 89 if (!(o instanceof WordInfo)) return false; 90 final WordInfo wordInfo = (WordInfo)o; 91 if (mWord == null || wordInfo.mWord == null) { 92 return mWord == wordInfo.mWord 93 && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 94 } 95 return TextUtils.equals(mWord, wordInfo.mWord) 96 && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; 97 } 98 } 99 100 // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't 101 // have any context for that previous word including the "beginning of sentence context" - we 102 // just don't know what to predict using the information. An example of that is after a comma. 103 // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the 104 // WordComposer was reset and before starting a new composing word, but we should never be 105 // calling getSuggetions* in this situation. 106 private final WordInfo[] mPrevWordsInfo; 107 private final int mPrevWordsCount; 108 109 private final int mMaxPrevWordCount; 110 111 // Construct from the previous word information. NgramContext(final WordInfo... prevWordsInfo)112 public NgramContext(final WordInfo... prevWordsInfo) { 113 this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo); 114 } 115 NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo)116 public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) { 117 mPrevWordsInfo = prevWordsInfo; 118 mPrevWordsCount = prevWordsInfo.length; 119 mMaxPrevWordCount = maxPrevWordCount; 120 } 121 122 /** 123 * Create next prevWordsInfo using current prevWordsInfo. 124 */ 125 @Nonnull getNextNgramContext(final WordInfo wordInfo)126 public NgramContext getNextNgramContext(final WordInfo wordInfo) { 127 final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1); 128 final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; 129 prevWordsInfo[0] = wordInfo; 130 System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); 131 return new NgramContext(mMaxPrevWordCount, prevWordsInfo); 132 } 133 134 135 /** 136 * Extracts the previous words context. 137 * 138 * @return a String with the previous words separated by white space. 139 */ extractPrevWordsContext()140 public String extractPrevWordsContext() { 141 final ArrayList<String> terms = new ArrayList<>(); 142 for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { 143 if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { 144 final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; 145 if (wordInfo.mIsBeginningOfSentence) { 146 terms.add(BEGINNING_OF_SENTENCE_TAG); 147 } else { 148 final String term = wordInfo.mWord.toString(); 149 if (!term.isEmpty()) { 150 terms.add(term); 151 } 152 } 153 } 154 } 155 return TextUtils.join(CONTEXT_SEPARATOR, terms); 156 } 157 158 /** 159 * Extracts the previous words context. 160 * 161 * @return a String array with the previous words. 162 */ extractPrevWordsContextArray()163 public String[] extractPrevWordsContextArray() { 164 final ArrayList<String> prevTermList = new ArrayList<>(); 165 for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) { 166 if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) { 167 final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i]; 168 if (wordInfo.mIsBeginningOfSentence) { 169 prevTermList.add(BEGINNING_OF_SENTENCE_TAG); 170 } else { 171 final String term = wordInfo.mWord.toString(); 172 if (!term.isEmpty()) { 173 prevTermList.add(term); 174 } 175 } 176 } 177 } 178 final String[] contextStringArray = prevTermList.toArray(new String[prevTermList.size()]); 179 return contextStringArray; 180 } 181 isValid()182 public boolean isValid() { 183 return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); 184 } 185 isBeginningOfSentenceContext()186 public boolean isBeginningOfSentenceContext() { 187 return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; 188 } 189 190 // n is 1-indexed. 191 // TODO: Remove getNthPrevWord(final int n)192 public CharSequence getNthPrevWord(final int n) { 193 if (n <= 0 || n > mPrevWordsCount) { 194 return null; 195 } 196 return mPrevWordsInfo[n - 1].mWord; 197 } 198 199 // n is 1-indexed. 200 @UsedForTesting isNthPrevWordBeginningOfSentence(final int n)201 public boolean isNthPrevWordBeginningOfSentence(final int n) { 202 if (n <= 0 || n > mPrevWordsCount) { 203 return false; 204 } 205 return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; 206 } 207 outputToArray(final int[][] codePointArrays, final boolean[] isBeginningOfSentenceArray)208 public void outputToArray(final int[][] codePointArrays, 209 final boolean[] isBeginningOfSentenceArray) { 210 for (int i = 0; i < mPrevWordsCount; i++) { 211 final WordInfo wordInfo = mPrevWordsInfo[i]; 212 if (wordInfo == null || !wordInfo.isValid()) { 213 codePointArrays[i] = new int[0]; 214 isBeginningOfSentenceArray[i] = false; 215 continue; 216 } 217 codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); 218 isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; 219 } 220 } 221 getPrevWordCount()222 public int getPrevWordCount() { 223 return mPrevWordsCount; 224 } 225 226 @Override hashCode()227 public int hashCode() { 228 int hashValue = 0; 229 for (final WordInfo wordInfo : mPrevWordsInfo) { 230 if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { 231 break; 232 } 233 hashValue ^= wordInfo.hashCode(); 234 } 235 return hashValue; 236 } 237 238 @Override equals(Object o)239 public boolean equals(Object o) { 240 if (this == o) return true; 241 if (!(o instanceof NgramContext)) return false; 242 final NgramContext prevWordsInfo = (NgramContext)o; 243 244 final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); 245 for (int i = 0; i < minLength; i++) { 246 if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { 247 return false; 248 } 249 } 250 final WordInfo[] longerWordsInfo; 251 final int longerWordsInfoCount; 252 if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { 253 longerWordsInfo = mPrevWordsInfo; 254 longerWordsInfoCount = mPrevWordsCount; 255 } else { 256 longerWordsInfo = prevWordsInfo.mPrevWordsInfo; 257 longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; 258 } 259 for (int i = minLength; i < longerWordsInfoCount; i++) { 260 if (longerWordsInfo[i] != null 261 && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { 262 return false; 263 } 264 } 265 return true; 266 } 267 268 @Override toString()269 public String toString() { 270 final StringBuffer builder = new StringBuffer(); 271 for (int i = 0; i < mPrevWordsCount; i++) { 272 final WordInfo wordInfo = mPrevWordsInfo[i]; 273 builder.append("PrevWord["); 274 builder.append(i); 275 builder.append("]: "); 276 if (wordInfo == null) { 277 builder.append("null. "); 278 continue; 279 } 280 if (!wordInfo.isValid()) { 281 builder.append("Empty. "); 282 continue; 283 } 284 builder.append(wordInfo.mWord); 285 builder.append(", isBeginningOfSentence: "); 286 builder.append(wordInfo.mIsBeginningOfSentence); 287 builder.append(". "); 288 } 289 return builder.toString(); 290 } 291 } 292