1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.dialer.smartdial.util;
18 
19 import android.content.Context;
20 import android.support.annotation.Nullable;
21 import android.text.TextUtils;
22 import com.android.dialer.smartdial.map.CompositeSmartDialMap;
23 import com.android.dialer.smartdial.util.SmartDialPrefix.PhoneNumberTokens;
24 import java.util.ArrayList;
25 
26 /**
27  * {@link #SmartDialNameMatcher} contains utility functions to remove accents from accented
28  * characters and normalize a phone number. It also contains the matching logic that determines if a
29  * contact's display name matches a numeric query. The boolean variable {@link #ALLOW_INITIAL_MATCH}
30  * controls the behavior of the matching logic and determines whether we allow matches like 57 -
31  * (J)ohn (S)mith.
32  */
33 public class SmartDialNameMatcher {
34   // Whether or not we allow matches like 57 - (J)ohn (S)mith
35   private static final boolean ALLOW_INITIAL_MATCH = true;
36 
37   // The maximum length of the initial we will match - typically set to 1 to minimize false
38   // positives
39   private static final int INITIAL_LENGTH_LIMIT = 1;
40 
41   private final ArrayList<SmartDialMatchPosition> matchPositions = new ArrayList<>();
42   private String query;
43 
44   // Controls whether to treat an empty query as a match (with anything).
45   private boolean shouldMatchEmptyQuery = false;
46 
SmartDialNameMatcher(String query)47   public SmartDialNameMatcher(String query) {
48     this.query = query;
49   }
50 
51   /**
52    * Strips a phone number of unnecessary characters (spaces, dashes, etc.)
53    *
54    * @param number Phone number we want to normalize
55    * @return Phone number consisting of digits from 0-9
56    */
normalizeNumber(Context context, String number)57   public static String normalizeNumber(Context context, String number) {
58     return normalizeNumber(context, number, /* offset = */ 0);
59   }
60 
61   /**
62    * Strips a phone number of unnecessary characters (spaces, dashes, etc.)
63    *
64    * @param number Phone number we want to normalize
65    * @param offset Offset to start from
66    * @return Phone number consisting of digits from 0-9
67    */
normalizeNumber(Context context, String number, int offset)68   public static String normalizeNumber(Context context, String number, int offset) {
69     final StringBuilder s = new StringBuilder();
70     for (int i = offset; i < number.length(); i++) {
71       char ch = number.charAt(i);
72       if (CompositeSmartDialMap.isValidDialpadNumericChar(context, ch)) {
73         s.append(ch);
74       }
75     }
76     return s.toString();
77   }
78 
79   /**
80    * Constructs empty highlight mask. Bit 0 at a position means there is no match, Bit 1 means there
81    * is a match and should be highlighted in the TextView.
82    *
83    * @param builder StringBuilder object
84    * @param length Length of the desired mask.
85    */
constructEmptyMask(StringBuilder builder, int length)86   private void constructEmptyMask(StringBuilder builder, int length) {
87     for (int i = 0; i < length; ++i) {
88       builder.append("0");
89     }
90   }
91 
92   /**
93    * Replaces the 0-bit at a position with 1-bit, indicating that there is a match.
94    *
95    * @param builder StringBuilder object.
96    * @param matchPos Match Positions to mask as 1.
97    */
replaceBitInMask(StringBuilder builder, SmartDialMatchPosition matchPos)98   private void replaceBitInMask(StringBuilder builder, SmartDialMatchPosition matchPos) {
99     for (int i = matchPos.start; i < matchPos.end; ++i) {
100       builder.replace(i, i + 1, "1");
101     }
102   }
103 
104   /**
105    * Matches a phone number against a query. Let the test application overwrite the NANP setting.
106    *
107    * @param phoneNumber - Raw phone number
108    * @param query - Normalized query (only contains numbers from 0-9)
109    * @return {@literal null} if the number and the query don't match, a valid SmartDialMatchPosition
110    *     with the matching positions otherwise
111    */
112   @Nullable
matchesNumber(Context context, String phoneNumber, String query)113   public SmartDialMatchPosition matchesNumber(Context context, String phoneNumber, String query) {
114     if (TextUtils.isEmpty(phoneNumber)) {
115       return shouldMatchEmptyQuery ? new SmartDialMatchPosition(0, 0) : null;
116     }
117     StringBuilder builder = new StringBuilder();
118     constructEmptyMask(builder, phoneNumber.length());
119 
120     // Try matching the number as is
121     SmartDialMatchPosition matchPos =
122         matchesNumberWithOffset(context, phoneNumber, query, /* offset = */ 0);
123     if (matchPos == null) {
124       PhoneNumberTokens phoneNumberTokens = SmartDialPrefix.parsePhoneNumber(context, phoneNumber);
125 
126       if (phoneNumberTokens.countryCodeOffset != 0) {
127         matchPos =
128             matchesNumberWithOffset(
129                 context, phoneNumber, query, phoneNumberTokens.countryCodeOffset);
130       }
131       if (matchPos == null && phoneNumberTokens.nanpCodeOffset != 0) {
132         matchPos =
133             matchesNumberWithOffset(context, phoneNumber, query, phoneNumberTokens.nanpCodeOffset);
134       }
135     }
136     if (matchPos != null) {
137       replaceBitInMask(builder, matchPos);
138     }
139     return matchPos;
140   }
141 
142   /**
143    * Matches a phone number against the saved query, taking care of formatting characters and also
144    * taking into account country code prefixes and special NANP number treatment.
145    *
146    * @param phoneNumber - Raw phone number
147    * @return {@literal null} if the number and the query don't match, a valid SmartDialMatchPosition
148    *     with the matching positions otherwise
149    */
matchesNumber(Context context, String phoneNumber)150   public SmartDialMatchPosition matchesNumber(Context context, String phoneNumber) {
151     return matchesNumber(context, phoneNumber, query);
152   }
153 
154   /**
155    * Matches a phone number against a query, taking care of formatting characters
156    *
157    * @param phoneNumber - Raw phone number
158    * @param query - Normalized query (only contains numbers from 0-9)
159    * @param offset - The position in the number to start the match against (used to ignore leading
160    *     prefixes/country codes)
161    * @return {@literal null} if the number and the query don't match, a valid SmartDialMatchPosition
162    *     with the matching positions otherwise
163    */
matchesNumberWithOffset( Context context, String phoneNumber, String query, int offset)164   private SmartDialMatchPosition matchesNumberWithOffset(
165       Context context, String phoneNumber, String query, int offset) {
166     if (TextUtils.isEmpty(phoneNumber) || TextUtils.isEmpty(query)) {
167       return shouldMatchEmptyQuery ? new SmartDialMatchPosition(offset, offset) : null;
168     }
169     int queryAt = 0;
170     int numberAt = offset;
171     for (int i = offset; i < phoneNumber.length(); i++) {
172       if (queryAt == query.length()) {
173         break;
174       }
175       char ch = phoneNumber.charAt(i);
176       if (CompositeSmartDialMap.isValidDialpadNumericChar(context, ch)) {
177         if (ch != query.charAt(queryAt)) {
178           return null;
179         }
180         queryAt++;
181       } else {
182         if (queryAt == 0) {
183           // Found a separator before any part of the query was matched, so advance the
184           // offset to avoid prematurely highlighting separators before the rest of the
185           // query.
186           // E.g. don't highlight the first '-' if we're matching 1-510-111-1111 with
187           // '510'.
188           // However, if the current offset is 0, just include the beginning separators
189           // anyway, otherwise the highlighting ends up looking weird.
190           // E.g. if we're matching (510)-111-1111 with '510', we should include the
191           // first '('.
192           if (offset != 0) {
193             offset++;
194           }
195         }
196       }
197       numberAt++;
198     }
199     return new SmartDialMatchPosition(0 + offset, numberAt);
200   }
201 
202   /**
203    * This function iterates through each token in the display name, trying to match the query to the
204    * numeric equivalent of the token.
205    *
206    * <p>A token is defined as a range in the display name delimited by characters that have no latin
207    * alphabet equivalents (e.g. spaces - ' ', periods - ',', underscores - '_' or chinese characters
208    * - '王'). Transliteration from non-latin characters to latin character will be done on a best
209    * effort basis - e.g. 'Ü' - 'u'.
210    *
211    * <p>For example, the display name "Phillips Thomas Jr" contains three tokens: "phillips",
212    * "thomas", and "jr".
213    *
214    * <p>A match must begin at the start of a token. For example, typing 846(Tho) would match
215    * "Phillips Thomas", but 466(hom) would not.
216    *
217    * <p>Also, a match can extend across tokens. For example, typing 37337(FredS) would match (Fred
218    * S)mith.
219    *
220    * @param displayName The normalized(no accented characters) display name we intend to match
221    *     against.
222    * @param query The string of digits that we want to match the display name to.
223    * @param matchList An array list of {@link SmartDialMatchPosition}s that we add matched positions
224    *     to.
225    * @return Returns true if a combination of the tokens in displayName match the query string
226    *     contained in query. If the function returns true, matchList will contain an ArrayList of
227    *     match positions (multiple matches correspond to initial matches).
228    */
matchesCombination( Context context, String displayName, String query, ArrayList<SmartDialMatchPosition> matchList)229   private boolean matchesCombination(
230       Context context,
231       String displayName,
232       String query,
233       ArrayList<SmartDialMatchPosition> matchList) {
234     StringBuilder builder = new StringBuilder();
235     constructEmptyMask(builder, displayName.length());
236     final int nameLength = displayName.length();
237     final int queryLength = query.length();
238 
239     if (nameLength < queryLength) {
240       return false;
241     }
242 
243     if (queryLength == 0) {
244       return false;
245     }
246 
247     // The current character index in displayName
248     // E.g. 3 corresponds to 'd' in "Fred Smith"
249     int nameStart = 0;
250 
251     // The current character in the query we are trying to match the displayName against
252     int queryStart = 0;
253 
254     // The start position of the current token we are inspecting
255     int tokenStart = 0;
256 
257     // The number of non-alphabetic characters we've encountered so far in the current match.
258     // E.g. if we've currently matched 3733764849 to (Fred Smith W)illiam, then the
259     // seperatorCount should be 2. This allows us to correctly calculate offsets for the match
260     // positions
261     int seperatorCount = 0;
262 
263     ArrayList<SmartDialMatchPosition> partial = new ArrayList<SmartDialMatchPosition>();
264     // Keep going until we reach the end of displayName
265     while (nameStart < nameLength && queryStart < queryLength) {
266       char ch = displayName.charAt(nameStart);
267       // Strip diacritics from accented characters if any
268       ch = CompositeSmartDialMap.normalizeCharacter(context, ch);
269       if (CompositeSmartDialMap.isValidDialpadCharacter(context, ch)) {
270         if (CompositeSmartDialMap.isValidDialpadAlphabeticChar(context, ch)) {
271           ch = CompositeSmartDialMap.getDialpadNumericCharacter(context, ch);
272         }
273         if (ch != query.charAt(queryStart)) {
274           // Failed to match the current character in the query.
275 
276           // Case 1: Failed to match the first character in the query. Skip to the next
277           // token since there is no chance of this token matching the query.
278 
279           // Case 2: Previous characters in the query matched, but the current character
280           // failed to match. This happened in the middle of a token. Skip to the next
281           // token since there is no chance of this token matching the query.
282 
283           // Case 3: Previous characters in the query matched, but the current character
284           // failed to match. This happened right at the start of the current token. In
285           // this case, we should restart the query and try again with the current token.
286           // Otherwise, we would fail to match a query like "964"(yog) against a name
287           // Yo-Yoghurt because the query match would fail on the 3rd character, and
288           // then skip to the end of the "Yoghurt" token.
289 
290           if (queryStart == 0
291               || CompositeSmartDialMap.isValidDialpadCharacter(
292                   context,
293                   CompositeSmartDialMap.normalizeCharacter(
294                       context, displayName.charAt(nameStart - 1)))) {
295             // skip to the next token, in the case of 1 or 2.
296             while (nameStart < nameLength
297                 && CompositeSmartDialMap.isValidDialpadCharacter(
298                     context,
299                     CompositeSmartDialMap.normalizeCharacter(
300                         context, displayName.charAt(nameStart)))) {
301               nameStart++;
302             }
303             nameStart++;
304           }
305 
306           // Restart the query and set the correct token position
307           queryStart = 0;
308           seperatorCount = 0;
309           tokenStart = nameStart;
310         } else {
311           if (queryStart == queryLength - 1) {
312 
313             // As much as possible, we prioritize a full token match over a sub token
314             // one so if we find a full token match, we can return right away
315             matchList.add(
316                 new SmartDialMatchPosition(tokenStart, queryLength + tokenStart + seperatorCount));
317             for (SmartDialMatchPosition match : matchList) {
318               replaceBitInMask(builder, match);
319             }
320             return true;
321           } else if (ALLOW_INITIAL_MATCH && queryStart < INITIAL_LENGTH_LIMIT) {
322             // we matched the first character.
323             // branch off and see if we can find another match with the remaining
324             // characters in the query string and the remaining tokens
325             // find the next separator in the query string
326             int j;
327             for (j = nameStart; j < nameLength; j++) {
328               if (!CompositeSmartDialMap.isValidDialpadCharacter(
329                   context,
330                   CompositeSmartDialMap.normalizeCharacter(context, displayName.charAt(j)))) {
331                 break;
332               }
333             }
334             // this means there is at least one character left after the separator
335             if (j < nameLength - 1) {
336               final String remainder = displayName.substring(j + 1);
337               final ArrayList<SmartDialMatchPosition> partialTemp = new ArrayList<>();
338               if (matchesCombination(
339                   context, remainder, query.substring(queryStart + 1), partialTemp)) {
340 
341                 // store the list of possible match positions
342                 SmartDialMatchPosition.advanceMatchPositions(partialTemp, j + 1);
343                 partialTemp.add(0, new SmartDialMatchPosition(nameStart, nameStart + 1));
344                 // we found a partial token match, store the data in a
345                 // temp buffer and return it if we end up not finding a full
346                 // token match
347                 partial = partialTemp;
348               }
349             }
350           }
351           nameStart++;
352           queryStart++;
353           // we matched the current character in the name against one in the query,
354           // continue and see if the rest of the characters match
355         }
356       } else {
357         // found a separator, we skip this character and continue to the next one
358         nameStart++;
359         if (queryStart == 0) {
360           // This means we found a separator before the start of a token,
361           // so we should increment the token's start position to reflect its true
362           // start position
363           tokenStart = nameStart;
364         } else {
365           // Otherwise this separator was found in the middle of a token being matched,
366           // so increase the separator count
367           seperatorCount++;
368         }
369       }
370     }
371     // if we have no complete match at this point, then we attempt to fall back to the partial
372     // token match(if any). If we don't allow initial matching (ALLOW_INITIAL_MATCH = false)
373     // then partial will always be empty.
374     if (!partial.isEmpty()) {
375       matchList.addAll(partial);
376       for (SmartDialMatchPosition match : matchList) {
377         replaceBitInMask(builder, match);
378       }
379       return true;
380     }
381     return false;
382   }
383 
384   /**
385    * This function iterates through each token in the display name, trying to match the query to the
386    * numeric equivalent of the token.
387    *
388    * <p>A token is defined as a range in the display name delimited by characters that have no latin
389    * alphabet equivalents (e.g. spaces - ' ', periods - ',', underscores - '_' or chinese characters
390    * - '王'). Transliteration from non-latin characters to latin character will be done on a best
391    * effort basis - e.g. 'Ü' - 'u'.
392    *
393    * <p>For example, the display name "Phillips Thomas Jr" contains three tokens: "phillips",
394    * "thomas", and "jr".
395    *
396    * <p>A match must begin at the start of a token. For example, typing 846(Tho) would match
397    * "Phillips Thomas", but 466(hom) would not.
398    *
399    * <p>Also, a match can extend across tokens. For example, typing 37337(FredS) would match (Fred
400    * S)mith.
401    *
402    * @param displayName The normalized(no accented characters) display name we intend to match
403    *     against.
404    * @return Returns true if a combination of the tokens in displayName match the query string
405    *     contained in query. If the function returns true, matchList will contain an ArrayList of
406    *     match positions (multiple matches correspond to initial matches).
407    */
matches(Context context, String displayName)408   public boolean matches(Context context, String displayName) {
409     matchPositions.clear();
410     return matchesCombination(context, displayName, query, matchPositions);
411   }
412 
getMatchPositions()413   public ArrayList<SmartDialMatchPosition> getMatchPositions() {
414     // Return a clone of mMatchPositions so that the caller can use it without
415     // worrying about it changing
416     return new ArrayList<>(matchPositions);
417   }
418 
getQuery()419   public String getQuery() {
420     return query;
421   }
422 
setQuery(String query)423   public void setQuery(String query) {
424     this.query = query;
425   }
426 
setShouldMatchEmptyQuery(boolean matches)427   public void setShouldMatchEmptyQuery(boolean matches) {
428     shouldMatchEmptyQuery = matches;
429   }
430 }
431