1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16 
17 package com.android.providers.contacts;
18 
19 import android.provider.ContactsContract.FullNameStyle;
20 
21 import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType;
22 import com.android.providers.contacts.SearchIndexManager.IndexBuilder;
23 
24 import java.util.Arrays;
25 import java.util.Comparator;
26 import java.util.Iterator;
27 
28 /**
29  * Given a full name, constructs all possible variants of the name.
30  */
31 public abstract class NameLookupBuilder {
32 
33     private static final int MAX_NAME_TOKENS = 4;
34 
35     private final NameSplitter mSplitter;
36     private String[][] mNicknameClusters = new String[MAX_NAME_TOKENS][];
37     private StringBuilder mStringBuilder = new StringBuilder();
38     private String[] mNames = new String[NameSplitter.MAX_TOKENS];
39 
40     private static final int[] KOREAN_JAUM_CONVERT_MAP = {
41         // JAUM in Hangul Compatibility Jamo area 0x3131 ~ 0x314E to
42         // in Hangul Jamo area 0x1100 ~ 0x1112
43         0x1100, // 0x3131 HANGUL LETTER KIYEOK
44         0x1101, // 0x3132 HANGUL LETTER SSANGKIYEOK
45         0x00,   // 0x3133 HANGUL LETTER KIYEOKSIOS (Ignored)
46         0x1102, // 0x3134 HANGUL LETTER NIEUN
47         0x00,   // 0x3135 HANGUL LETTER NIEUNCIEUC (Ignored)
48         0x00,   // 0x3136 HANGUL LETTER NIEUNHIEUH (Ignored)
49         0x1103, // 0x3137 HANGUL LETTER TIKEUT
50         0x1104, // 0x3138 HANGUL LETTER SSANGTIKEUT
51         0x1105, // 0x3139 HANGUL LETTER RIEUL
52         0x00,   // 0x313A HANGUL LETTER RIEULKIYEOK (Ignored)
53         0x00,   // 0x313B HANGUL LETTER RIEULMIEUM (Ignored)
54         0x00,   // 0x313C HANGUL LETTER RIEULPIEUP (Ignored)
55         0x00,   // 0x313D HANGUL LETTER RIEULSIOS (Ignored)
56         0x00,   // 0x313E HANGUL LETTER RIEULTHIEUTH (Ignored)
57         0x00,   // 0x313F HANGUL LETTER RIEULPHIEUPH (Ignored)
58         0x00,   // 0x3140 HANGUL LETTER RIEULHIEUH (Ignored)
59         0x1106, // 0x3141 HANGUL LETTER MIEUM
60         0x1107, // 0x3142 HANGUL LETTER PIEUP
61         0x1108, // 0x3143 HANGUL LETTER SSANGPIEUP
62         0x00,   // 0x3144 HANGUL LETTER PIEUPSIOS (Ignored)
63         0x1109, // 0x3145 HANGUL LETTER SIOS
64         0x110A, // 0x3146 HANGUL LETTER SSANGSIOS
65         0x110B, // 0x3147 HANGUL LETTER IEUNG
66         0x110C, // 0x3148 HANGUL LETTER CIEUC
67         0x110D, // 0x3149 HANGUL LETTER SSANGCIEUC
68         0x110E, // 0x314A HANGUL LETTER CHIEUCH
69         0x110F, // 0x314B HANGUL LETTER KHIEUKH
70         0x1110, // 0x314C HANGUL LETTER THIEUTH
71         0x1111, // 0x314D HANGUL LETTER PHIEUPH
72         0x1112  // 0x314E HANGUL LETTER HIEUH
73     };
74 
NameLookupBuilder(NameSplitter splitter)75     public NameLookupBuilder(NameSplitter splitter) {
76         mSplitter = splitter;
77     }
78 
79     /**
80      * Inserts a name lookup record with the supplied column values.
81      */
insertNameLookup(long rawContactId, long dataId, int lookupType, String string)82     protected abstract void insertNameLookup(long rawContactId, long dataId, int lookupType,
83             String string);
84 
85     /**
86      * Returns common nickname cluster IDs for a given name. For example, it
87      * will return the same value for "Robert", "Bob" and "Rob". Some names belong to multiple
88      * clusters, e.g. Leo could be Leonard or Leopold.
89      *
90      * May return null.
91      *
92      * @param normalizedName A normalized first name, see {@link NameNormalizer#normalize}.
93      */
getCommonNicknameClusters(String normalizedName)94     protected abstract String[] getCommonNicknameClusters(String normalizedName);
95 
96     /**
97      * Inserts name lookup records for the given structured name.
98      */
insertNameLookup(long rawContactId, long dataId, String name, int fullNameStyle)99     public void insertNameLookup(long rawContactId, long dataId, String name, int fullNameStyle) {
100         int tokenCount = mSplitter.tokenize(mNames, name);
101         if (tokenCount == 0) {
102             return;
103         }
104 
105         for (int i = 0; i < tokenCount; i++) {
106             mNames[i] = normalizeName(mNames[i]);
107         }
108 
109         boolean tooManyTokens = tokenCount > MAX_NAME_TOKENS;
110         if (tooManyTokens) {
111             insertNameVariant(rawContactId, dataId, tokenCount, NameLookupType.NAME_EXACT, true);
112 
113             // Favor longer parts of the name
114             Arrays.sort(mNames, 0, tokenCount, new Comparator<String>() {
115 
116                 public int compare(String s1, String s2) {
117                     return s2.length() - s1.length();
118                 }
119             });
120 
121             // Insert a collation key for each extra word - useful for contact filtering
122             // and suggestions
123             String firstToken = mNames[0];
124             for (int i = MAX_NAME_TOKENS; i < tokenCount; i++) {
125                 mNames[0] = mNames[i];
126                 insertCollationKey(rawContactId, dataId, MAX_NAME_TOKENS);
127             }
128             mNames[0] = firstToken;
129 
130             tokenCount = MAX_NAME_TOKENS;
131         }
132 
133         // Phase I: insert all variants not involving nickname clusters
134         for (int i = 0; i < tokenCount; i++) {
135             mNicknameClusters[i] = getCommonNicknameClusters(mNames[i]);
136         }
137 
138         insertNameVariants(rawContactId, dataId, 0, tokenCount, !tooManyTokens, true);
139         insertNicknamePermutations(rawContactId, dataId, 0, tokenCount);
140     }
141 
appendToSearchIndex(IndexBuilder builder, String name, int fullNameStyle)142     public void appendToSearchIndex(IndexBuilder builder, String name, int fullNameStyle) {
143         int tokenCount = mSplitter.tokenize(mNames, name);
144         if (tokenCount == 0) {
145             return;
146         }
147 
148         for (int i = 0; i < tokenCount; i++) {
149             builder.appendName(mNames[i]);
150         }
151 
152         appendNameShorthandLookup(builder, name, fullNameStyle);
153         appendNameLookupForLocaleBasedName(builder, name, fullNameStyle);
154     }
155 
156     /**
157      * Insert more name indexes according to locale specifies.
158      */
appendNameLookupForLocaleBasedName(IndexBuilder builder, String fullName, int fullNameStyle)159     private void appendNameLookupForLocaleBasedName(IndexBuilder builder,
160             String fullName, int fullNameStyle) {
161         if (fullNameStyle == FullNameStyle.KOREAN) {
162             NameSplitter.Name name = new NameSplitter.Name();
163             mSplitter.split(name, fullName, fullNameStyle);
164             if (name.givenNames != null) {
165                 builder.appendName(name.givenNames);
166                 appendKoreanNameConsonantsLookup(builder, name.givenNames);
167             }
168             appendKoreanNameConsonantsLookup(builder, fullName);
169         }
170     }
171 
172     /**
173      * Inserts Korean lead consonants records of name for the given structured name.
174      */
appendKoreanNameConsonantsLookup(IndexBuilder builder, String name)175     private void appendKoreanNameConsonantsLookup(IndexBuilder builder, String name) {
176         int position = 0;
177         int consonantLength = 0;
178         int character;
179 
180         final int stringLength = name.length();
181         mStringBuilder.setLength(0);
182         do {
183             character = name.codePointAt(position++);
184             if ((character == 0x20) || (character == 0x2c) || (character == 0x2E)) {
185                 // Skip spaces, commas and periods.
186                 continue;
187             }
188             // Exclude characters that are not in Korean leading consonants area
189             // and Korean characters area.
190             if ((character < 0x1100) || (character > 0x1112 && character < 0x3131) ||
191                     (character > 0x314E && character < 0xAC00) ||
192                     (character > 0xD7A3)) {
193                 break;
194             }
195             // Decompose and take a only lead-consonant for composed Korean characters.
196             if (character >= 0xAC00) {
197                 // Lead consonant = "Lead consonant base" +
198                 //      (character - "Korean Character base") /
199                 //          ("Lead consonant count" * "middle Vowel count")
200                 character = 0x1100 + (character - 0xAC00) / 588;
201             } else if (character >= 0x3131) {
202                 // Hangul Compatibility Jamo area 0x3131 ~ 0x314E :
203                 // Convert to Hangul Jamo area 0x1100 ~ 0x1112
204                 if (character - 0x3131 >= KOREAN_JAUM_CONVERT_MAP.length) {
205                     // This is not lead-consonant
206                     break;
207                 }
208                 character = KOREAN_JAUM_CONVERT_MAP[character - 0x3131];
209                 if (character == 0) {
210                     // This is not lead-consonant
211                     break;
212                 }
213             }
214             mStringBuilder.appendCodePoint(character);
215             consonantLength++;
216         } while (position < stringLength);
217 
218         // At least, insert consonants when Korean characters are two or more.
219         // Only one character cases are covered by NAME_COLLATION_KEY
220         if (consonantLength > 1) {
221             builder.appendName(mStringBuilder.toString());
222         }
223     }
224 
normalizeName(String name)225     protected String normalizeName(String name) {
226         return NameNormalizer.normalize(name);
227     }
228 
229     /**
230      * Inserts all name variants based on permutations of tokens between
231      * fromIndex and toIndex
232      *
233      * @param initiallyExact true if the name without permutations is the exact
234      *            original name
235      * @param buildCollationKey true if a collation key makes sense for these
236      *            permutations (false if at least one of the tokens is a
237      *            nickname cluster key)
238      */
insertNameVariants(long rawContactId, long dataId, int fromIndex, int toIndex, boolean initiallyExact, boolean buildCollationKey)239     private void insertNameVariants(long rawContactId, long dataId, int fromIndex, int toIndex,
240             boolean initiallyExact, boolean buildCollationKey) {
241         if (fromIndex == toIndex) {
242             insertNameVariant(rawContactId, dataId, toIndex,
243                     initiallyExact ? NameLookupType.NAME_EXACT : NameLookupType.NAME_VARIANT,
244                     buildCollationKey);
245             return;
246         }
247 
248         // Swap the first token with each other token (including itself, which is a no-op)
249         // and recursively insert all permutations for the remaining tokens
250         String firstToken = mNames[fromIndex];
251         for (int i = fromIndex; i < toIndex; i++) {
252             mNames[fromIndex] = mNames[i];
253             mNames[i] = firstToken;
254 
255             insertNameVariants(rawContactId, dataId, fromIndex + 1, toIndex,
256                     initiallyExact && i == fromIndex, buildCollationKey);
257 
258             mNames[i] = mNames[fromIndex];
259             mNames[fromIndex] = firstToken;
260         }
261     }
262 
263     /**
264      * Inserts a single name variant and optionally its collation key counterpart.
265      */
insertNameVariant(long rawContactId, long dataId, int tokenCount, int lookupType, boolean buildCollationKey)266     private void insertNameVariant(long rawContactId, long dataId, int tokenCount,
267             int lookupType, boolean buildCollationKey) {
268         mStringBuilder.setLength(0);
269 
270         for (int i = 0; i < tokenCount; i++) {
271             if (i != 0) {
272                 mStringBuilder.append('.');
273             }
274             mStringBuilder.append(mNames[i]);
275         }
276 
277         insertNameLookup(rawContactId, dataId, lookupType, mStringBuilder.toString());
278 
279         if (buildCollationKey) {
280             insertCollationKey(rawContactId, dataId, tokenCount);
281         }
282     }
283 
284     /**
285      * Inserts a collation key for the current contents of {@link #mNames}.
286      */
insertCollationKey(long rawContactId, long dataId, int tokenCount)287     private void insertCollationKey(long rawContactId, long dataId, int tokenCount) {
288         mStringBuilder.setLength(0);
289 
290         for (int i = 0; i < tokenCount; i++) {
291             mStringBuilder.append(mNames[i]);
292         }
293 
294         insertNameLookup(rawContactId, dataId, NameLookupType.NAME_COLLATION_KEY,
295                 mStringBuilder.toString());
296     }
297 
298     /**
299      * For all tokens that correspond to nickname clusters, substitutes each cluster key
300      * and inserts all permutations with that key.
301      */
insertNicknamePermutations(long rawContactId, long dataId, int fromIndex, int tokenCount)302     private void insertNicknamePermutations(long rawContactId, long dataId, int fromIndex,
303             int tokenCount) {
304         for (int i = fromIndex; i < tokenCount; i++) {
305             String[] clusters = mNicknameClusters[i];
306             if (clusters != null) {
307                 String token = mNames[i];
308                 for (int j = 0; j < clusters.length; j++) {
309                     mNames[i] = clusters[j];
310 
311                     // Insert all permutations with this nickname cluster
312                     insertNameVariants(rawContactId, dataId, 0, tokenCount, false, false);
313 
314                     // Repeat recursively for other nickname clusters
315                     insertNicknamePermutations(rawContactId, dataId, i + 1, tokenCount);
316                 }
317                 mNames[i] = token;
318             }
319         }
320     }
321 
322     /**
323      * Insert more name indexes according to locale specifies for those locales
324      * for which we have alternative shorthand name methods (eg, Pinyin for
325      * Chinese, Romaji for Japanese).
326      */
appendNameShorthandLookup(IndexBuilder builder, String name, int fullNameStyle)327     public void appendNameShorthandLookup(IndexBuilder builder, String name, int fullNameStyle) {
328         Iterator<String> it =
329                 ContactLocaleUtils.getInstance().getNameLookupKeys(name, fullNameStyle);
330         if (it != null) {
331             while (it.hasNext()) {
332                 builder.appendName(it.next());
333             }
334         }
335     }
336 }
337