1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package com.android.providers.contacts; 18 19 import android.provider.ContactsContract.FullNameStyle; 20 21 import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType; 22 import com.android.providers.contacts.SearchIndexManager.IndexBuilder; 23 24 import java.util.Arrays; 25 import java.util.Comparator; 26 import java.util.Iterator; 27 28 /** 29 * Given a full name, constructs all possible variants of the name. 30 */ 31 public abstract class NameLookupBuilder { 32 33 private static final int MAX_NAME_TOKENS = 4; 34 35 private final NameSplitter mSplitter; 36 private String[][] mNicknameClusters = new String[MAX_NAME_TOKENS][]; 37 private StringBuilder mStringBuilder = new StringBuilder(); 38 private String[] mNames = new String[NameSplitter.MAX_TOKENS]; 39 40 private static final int[] KOREAN_JAUM_CONVERT_MAP = { 41 // JAUM in Hangul Compatibility Jamo area 0x3131 ~ 0x314E to 42 // in Hangul Jamo area 0x1100 ~ 0x1112 43 0x1100, // 0x3131 HANGUL LETTER KIYEOK 44 0x1101, // 0x3132 HANGUL LETTER SSANGKIYEOK 45 0x00, // 0x3133 HANGUL LETTER KIYEOKSIOS (Ignored) 46 0x1102, // 0x3134 HANGUL LETTER NIEUN 47 0x00, // 0x3135 HANGUL LETTER NIEUNCIEUC (Ignored) 48 0x00, // 0x3136 HANGUL LETTER NIEUNHIEUH (Ignored) 49 0x1103, // 0x3137 HANGUL LETTER TIKEUT 50 0x1104, // 0x3138 HANGUL LETTER SSANGTIKEUT 51 0x1105, // 0x3139 HANGUL LETTER RIEUL 52 0x00, // 0x313A HANGUL LETTER RIEULKIYEOK (Ignored) 53 0x00, // 0x313B HANGUL LETTER RIEULMIEUM (Ignored) 54 0x00, // 0x313C HANGUL LETTER RIEULPIEUP (Ignored) 55 0x00, // 0x313D HANGUL LETTER RIEULSIOS (Ignored) 56 0x00, // 0x313E HANGUL LETTER RIEULTHIEUTH (Ignored) 57 0x00, // 0x313F HANGUL LETTER RIEULPHIEUPH (Ignored) 58 0x00, // 0x3140 HANGUL LETTER RIEULHIEUH (Ignored) 59 0x1106, // 0x3141 HANGUL LETTER MIEUM 60 0x1107, // 0x3142 HANGUL LETTER PIEUP 61 0x1108, // 0x3143 HANGUL LETTER SSANGPIEUP 62 0x00, // 0x3144 HANGUL LETTER PIEUPSIOS (Ignored) 63 0x1109, // 0x3145 HANGUL LETTER SIOS 64 0x110A, // 0x3146 HANGUL LETTER SSANGSIOS 65 0x110B, // 0x3147 HANGUL LETTER IEUNG 66 0x110C, // 0x3148 HANGUL LETTER CIEUC 67 0x110D, // 0x3149 HANGUL LETTER SSANGCIEUC 68 0x110E, // 0x314A HANGUL LETTER CHIEUCH 69 0x110F, // 0x314B HANGUL LETTER KHIEUKH 70 0x1110, // 0x314C HANGUL LETTER THIEUTH 71 0x1111, // 0x314D HANGUL LETTER PHIEUPH 72 0x1112 // 0x314E HANGUL LETTER HIEUH 73 }; 74 NameLookupBuilder(NameSplitter splitter)75 public NameLookupBuilder(NameSplitter splitter) { 76 mSplitter = splitter; 77 } 78 79 /** 80 * Inserts a name lookup record with the supplied column values. 81 */ insertNameLookup(long rawContactId, long dataId, int lookupType, String string)82 protected abstract void insertNameLookup(long rawContactId, long dataId, int lookupType, 83 String string); 84 85 /** 86 * Returns common nickname cluster IDs for a given name. For example, it 87 * will return the same value for "Robert", "Bob" and "Rob". Some names belong to multiple 88 * clusters, e.g. Leo could be Leonard or Leopold. 89 * 90 * May return null. 91 * 92 * @param normalizedName A normalized first name, see {@link NameNormalizer#normalize}. 93 */ getCommonNicknameClusters(String normalizedName)94 protected abstract String[] getCommonNicknameClusters(String normalizedName); 95 96 /** 97 * Inserts name lookup records for the given structured name. 98 */ insertNameLookup(long rawContactId, long dataId, String name, int fullNameStyle)99 public void insertNameLookup(long rawContactId, long dataId, String name, int fullNameStyle) { 100 int tokenCount = mSplitter.tokenize(mNames, name); 101 if (tokenCount == 0) { 102 return; 103 } 104 105 for (int i = 0; i < tokenCount; i++) { 106 mNames[i] = normalizeName(mNames[i]); 107 } 108 109 boolean tooManyTokens = tokenCount > MAX_NAME_TOKENS; 110 if (tooManyTokens) { 111 insertNameVariant(rawContactId, dataId, tokenCount, NameLookupType.NAME_EXACT, true); 112 113 // Favor longer parts of the name 114 Arrays.sort(mNames, 0, tokenCount, new Comparator<String>() { 115 116 public int compare(String s1, String s2) { 117 return s2.length() - s1.length(); 118 } 119 }); 120 121 // Insert a collation key for each extra word - useful for contact filtering 122 // and suggestions 123 String firstToken = mNames[0]; 124 for (int i = MAX_NAME_TOKENS; i < tokenCount; i++) { 125 mNames[0] = mNames[i]; 126 insertCollationKey(rawContactId, dataId, MAX_NAME_TOKENS); 127 } 128 mNames[0] = firstToken; 129 130 tokenCount = MAX_NAME_TOKENS; 131 } 132 133 // Phase I: insert all variants not involving nickname clusters 134 for (int i = 0; i < tokenCount; i++) { 135 mNicknameClusters[i] = getCommonNicknameClusters(mNames[i]); 136 } 137 138 insertNameVariants(rawContactId, dataId, 0, tokenCount, !tooManyTokens, true); 139 insertNicknamePermutations(rawContactId, dataId, 0, tokenCount); 140 } 141 appendToSearchIndex(IndexBuilder builder, String name, int fullNameStyle)142 public void appendToSearchIndex(IndexBuilder builder, String name, int fullNameStyle) { 143 int tokenCount = mSplitter.tokenize(mNames, name); 144 if (tokenCount == 0) { 145 return; 146 } 147 148 for (int i = 0; i < tokenCount; i++) { 149 builder.appendName(mNames[i]); 150 } 151 152 appendNameShorthandLookup(builder, name, fullNameStyle); 153 appendNameLookupForLocaleBasedName(builder, name, fullNameStyle); 154 } 155 156 /** 157 * Insert more name indexes according to locale specifies. 158 */ appendNameLookupForLocaleBasedName(IndexBuilder builder, String fullName, int fullNameStyle)159 private void appendNameLookupForLocaleBasedName(IndexBuilder builder, 160 String fullName, int fullNameStyle) { 161 if (fullNameStyle == FullNameStyle.KOREAN) { 162 NameSplitter.Name name = new NameSplitter.Name(); 163 mSplitter.split(name, fullName, fullNameStyle); 164 if (name.givenNames != null) { 165 builder.appendName(name.givenNames); 166 appendKoreanNameConsonantsLookup(builder, name.givenNames); 167 } 168 appendKoreanNameConsonantsLookup(builder, fullName); 169 } 170 } 171 172 /** 173 * Inserts Korean lead consonants records of name for the given structured name. 174 */ appendKoreanNameConsonantsLookup(IndexBuilder builder, String name)175 private void appendKoreanNameConsonantsLookup(IndexBuilder builder, String name) { 176 int position = 0; 177 int consonantLength = 0; 178 int character; 179 180 final int stringLength = name.length(); 181 mStringBuilder.setLength(0); 182 do { 183 character = name.codePointAt(position++); 184 if ((character == 0x20) || (character == 0x2c) || (character == 0x2E)) { 185 // Skip spaces, commas and periods. 186 continue; 187 } 188 // Exclude characters that are not in Korean leading consonants area 189 // and Korean characters area. 190 if ((character < 0x1100) || (character > 0x1112 && character < 0x3131) || 191 (character > 0x314E && character < 0xAC00) || 192 (character > 0xD7A3)) { 193 break; 194 } 195 // Decompose and take a only lead-consonant for composed Korean characters. 196 if (character >= 0xAC00) { 197 // Lead consonant = "Lead consonant base" + 198 // (character - "Korean Character base") / 199 // ("Lead consonant count" * "middle Vowel count") 200 character = 0x1100 + (character - 0xAC00) / 588; 201 } else if (character >= 0x3131) { 202 // Hangul Compatibility Jamo area 0x3131 ~ 0x314E : 203 // Convert to Hangul Jamo area 0x1100 ~ 0x1112 204 if (character - 0x3131 >= KOREAN_JAUM_CONVERT_MAP.length) { 205 // This is not lead-consonant 206 break; 207 } 208 character = KOREAN_JAUM_CONVERT_MAP[character - 0x3131]; 209 if (character == 0) { 210 // This is not lead-consonant 211 break; 212 } 213 } 214 mStringBuilder.appendCodePoint(character); 215 consonantLength++; 216 } while (position < stringLength); 217 218 // At least, insert consonants when Korean characters are two or more. 219 // Only one character cases are covered by NAME_COLLATION_KEY 220 if (consonantLength > 1) { 221 builder.appendName(mStringBuilder.toString()); 222 } 223 } 224 normalizeName(String name)225 protected String normalizeName(String name) { 226 return NameNormalizer.normalize(name); 227 } 228 229 /** 230 * Inserts all name variants based on permutations of tokens between 231 * fromIndex and toIndex 232 * 233 * @param initiallyExact true if the name without permutations is the exact 234 * original name 235 * @param buildCollationKey true if a collation key makes sense for these 236 * permutations (false if at least one of the tokens is a 237 * nickname cluster key) 238 */ insertNameVariants(long rawContactId, long dataId, int fromIndex, int toIndex, boolean initiallyExact, boolean buildCollationKey)239 private void insertNameVariants(long rawContactId, long dataId, int fromIndex, int toIndex, 240 boolean initiallyExact, boolean buildCollationKey) { 241 if (fromIndex == toIndex) { 242 insertNameVariant(rawContactId, dataId, toIndex, 243 initiallyExact ? NameLookupType.NAME_EXACT : NameLookupType.NAME_VARIANT, 244 buildCollationKey); 245 return; 246 } 247 248 // Swap the first token with each other token (including itself, which is a no-op) 249 // and recursively insert all permutations for the remaining tokens 250 String firstToken = mNames[fromIndex]; 251 for (int i = fromIndex; i < toIndex; i++) { 252 mNames[fromIndex] = mNames[i]; 253 mNames[i] = firstToken; 254 255 insertNameVariants(rawContactId, dataId, fromIndex + 1, toIndex, 256 initiallyExact && i == fromIndex, buildCollationKey); 257 258 mNames[i] = mNames[fromIndex]; 259 mNames[fromIndex] = firstToken; 260 } 261 } 262 263 /** 264 * Inserts a single name variant and optionally its collation key counterpart. 265 */ insertNameVariant(long rawContactId, long dataId, int tokenCount, int lookupType, boolean buildCollationKey)266 private void insertNameVariant(long rawContactId, long dataId, int tokenCount, 267 int lookupType, boolean buildCollationKey) { 268 mStringBuilder.setLength(0); 269 270 for (int i = 0; i < tokenCount; i++) { 271 if (i != 0) { 272 mStringBuilder.append('.'); 273 } 274 mStringBuilder.append(mNames[i]); 275 } 276 277 insertNameLookup(rawContactId, dataId, lookupType, mStringBuilder.toString()); 278 279 if (buildCollationKey) { 280 insertCollationKey(rawContactId, dataId, tokenCount); 281 } 282 } 283 284 /** 285 * Inserts a collation key for the current contents of {@link #mNames}. 286 */ insertCollationKey(long rawContactId, long dataId, int tokenCount)287 private void insertCollationKey(long rawContactId, long dataId, int tokenCount) { 288 mStringBuilder.setLength(0); 289 290 for (int i = 0; i < tokenCount; i++) { 291 mStringBuilder.append(mNames[i]); 292 } 293 294 insertNameLookup(rawContactId, dataId, NameLookupType.NAME_COLLATION_KEY, 295 mStringBuilder.toString()); 296 } 297 298 /** 299 * For all tokens that correspond to nickname clusters, substitutes each cluster key 300 * and inserts all permutations with that key. 301 */ insertNicknamePermutations(long rawContactId, long dataId, int fromIndex, int tokenCount)302 private void insertNicknamePermutations(long rawContactId, long dataId, int fromIndex, 303 int tokenCount) { 304 for (int i = fromIndex; i < tokenCount; i++) { 305 String[] clusters = mNicknameClusters[i]; 306 if (clusters != null) { 307 String token = mNames[i]; 308 for (int j = 0; j < clusters.length; j++) { 309 mNames[i] = clusters[j]; 310 311 // Insert all permutations with this nickname cluster 312 insertNameVariants(rawContactId, dataId, 0, tokenCount, false, false); 313 314 // Repeat recursively for other nickname clusters 315 insertNicknamePermutations(rawContactId, dataId, i + 1, tokenCount); 316 } 317 mNames[i] = token; 318 } 319 } 320 } 321 322 /** 323 * Insert more name indexes according to locale specifies for those locales 324 * for which we have alternative shorthand name methods (eg, Pinyin for 325 * Chinese, Romaji for Japanese). 326 */ appendNameShorthandLookup(IndexBuilder builder, String name, int fullNameStyle)327 public void appendNameShorthandLookup(IndexBuilder builder, String name, int fullNameStyle) { 328 Iterator<String> it = 329 ContactLocaleUtils.getInstance().getNameLookupKeys(name, fullNameStyle); 330 if (it != null) { 331 while (it.hasNext()) { 332 builder.appendName(it.next()); 333 } 334 } 335 } 336 } 337