1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16 package com.android.providers.contacts;
17 
18 import android.content.ContentValues;
19 import android.database.Cursor;
20 import android.database.sqlite.SQLiteDatabase;
21 import android.os.SystemClock;
22 import android.provider.ContactsContract.CommonDataKinds.Email;
23 import android.provider.ContactsContract.CommonDataKinds.Nickname;
24 import android.provider.ContactsContract.CommonDataKinds.Organization;
25 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal;
26 import android.provider.ContactsContract.Data;
27 import android.provider.ContactsContract.RawContacts;
28 import android.text.TextUtils;
29 import android.util.ArraySet;
30 import android.util.Log;
31 
32 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
33 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns;
34 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
35 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns;
36 import com.android.providers.contacts.ContactsDatabaseHelper.Tables;
37 import com.android.providers.contacts.util.CappedStringBuilder;
38 
39 import com.google.android.collect.Lists;
40 import com.google.common.annotations.VisibleForTesting;
41 
42 import java.util.ArrayList;
43 import java.util.List;
44 import java.util.Set;
45 import java.util.regex.Pattern;
46 
47 /**
48  * Maintains a search index for comprehensive contact search.
49  */
50 public class SearchIndexManager {
51     private static final String TAG = "ContactsFTS";
52 
53     private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE);
54 
55     private static final int MAX_STRING_BUILDER_SIZE = 1024 * 10;
56 
57     public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index";
58     private static final int SEARCH_INDEX_VERSION = 1;
59 
60     private static final class ContactIndexQuery {
61         public static final String[] COLUMNS = {
62                 Data.CONTACT_ID,
63                 MimetypesColumns.MIMETYPE,
64                 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5,
65                 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11,
66                 Data.DATA12, Data.DATA13, Data.DATA14
67         };
68 
69         public static final int MIMETYPE = 1;
70     }
71 
72     public static class IndexBuilder {
73         public static final int SEPARATOR_SPACE = 0;
74         public static final int SEPARATOR_PARENTHESES = 1;
75         public static final int SEPARATOR_SLASH = 2;
76         public static final int SEPARATOR_COMMA = 3;
77 
78         private CappedStringBuilder mSbContent = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE);
79         private CappedStringBuilder mSbName = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE);
80         private CappedStringBuilder mSbTokens = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE);
81         private CappedStringBuilder mSbElementContent = new CappedStringBuilder(
82                 MAX_STRING_BUILDER_SIZE);
83         private ArraySet<String> mUniqueElements = new ArraySet<>();
84         private Cursor mCursor;
85 
setCursor(Cursor cursor)86         void setCursor(Cursor cursor) {
87             this.mCursor = cursor;
88         }
89 
reset()90         void reset() {
91             mSbContent.clear();
92             mSbTokens.clear();
93             mSbName.clear();
94             mSbElementContent.clear();
95             mUniqueElements.clear();
96         }
97 
getContent()98         public String getContent() {
99             return mSbContent.length() == 0 ? null : mSbContent.toString();
100         }
101 
getName()102         public String getName() {
103             return mSbName.length() == 0 ? null : mSbName.toString();
104         }
105 
getTokens()106         public String getTokens() {
107             return mSbTokens.length() == 0 ? null : mSbTokens.toString();
108         }
109 
getString(String columnName)110         public String getString(String columnName) {
111             return mCursor.getString(mCursor.getColumnIndex(columnName));
112         }
113 
getInt(String columnName)114         public int getInt(String columnName) {
115             return mCursor.getInt(mCursor.getColumnIndex(columnName));
116         }
117 
118         @Override
toString()119         public String toString() {
120             return "Content: " + mSbContent + "\n Name: " + mSbName + "\n Tokens: " + mSbTokens;
121         }
122 
commit()123         public void commit() {
124             if (mSbElementContent.length() != 0) {
125                 String content = mSbElementContent.toString().replace('\n', ' ');
126                 if (!mUniqueElements.contains(content)) {
127                     if (mSbContent.length() != 0) {
128                         mSbContent.append('\n');
129                     }
130                     mSbContent.append(content);
131                     mUniqueElements.add(content);
132                 }
133                 mSbElementContent.clear();
134             }
135         }
136 
appendContentFromColumn(String columnName)137         public void appendContentFromColumn(String columnName) {
138             appendContentFromColumn(columnName, SEPARATOR_SPACE);
139         }
140 
appendContentFromColumn(String columnName, int format)141         public void appendContentFromColumn(String columnName, int format) {
142             appendContent(getString(columnName), format);
143         }
144 
appendContent(String value)145         public void appendContent(String value) {
146             appendContent(value, SEPARATOR_SPACE);
147         }
148 
appendContent(String value, int format)149         private void appendContent(String value, int format) {
150             if (TextUtils.isEmpty(value)) {
151                 return;
152             }
153 
154             switch (format) {
155                 case SEPARATOR_SPACE:
156                     if (mSbElementContent.length() > 0) {
157                         mSbElementContent.append(' ');
158                     }
159                     mSbElementContent.append(value);
160                     break;
161 
162                 case SEPARATOR_SLASH:
163                     mSbElementContent.append('/').append(value);
164                     break;
165 
166                 case SEPARATOR_PARENTHESES:
167                     if (mSbElementContent.length() > 0) {
168                         mSbElementContent.append(' ');
169                     }
170                     mSbElementContent.append('(').append(value).append(')');
171                     break;
172 
173                 case SEPARATOR_COMMA:
174                     if (mSbElementContent.length() > 0) {
175                         mSbElementContent.append(", ");
176                     }
177                     mSbElementContent.append(value);
178                     break;
179             }
180         }
181 
appendToken(String token)182         public void appendToken(String token) {
183             if (TextUtils.isEmpty(token)) {
184                 return;
185             }
186 
187             if (mSbTokens.length() != 0) {
188                 mSbTokens.append(' ');
189             }
190             mSbTokens.append(token);
191         }
192 
appendNameFromColumn(String columnName)193         public void appendNameFromColumn(String columnName) {
194             appendName(getString(columnName));
195         }
196 
appendName(String name)197         public void appendName(String name) {
198             if (TextUtils.isEmpty(name)) {
199                 return;
200             }
201             // First, put the original name.
202             appendNameInternal(name);
203 
204             // Then, if the name contains more than one FTS token, put each token into the index
205             // too.
206             //
207             // This is to make names with special characters searchable, such as "double-barrelled"
208             // "L'Image".
209             //
210             // Here's how it works:
211             // Because we "normalize" names when putting into the index, if we only put
212             // "double-barrelled", the index will only contain "doublebarrelled".
213             // Now, if the user searches for "double-barrelled", the searcher tokenizes it into
214             // two tokens, "double" and "barrelled".  The first one matches "doublebarrelled"
215             // but the second one doesn't (because we only do the prefix match), so
216             // "doublebarrelled" doesn't match.
217             // So, here, we put each token in a name into the index too.  In the case above,
218             // we put also "double" and "barrelled".
219             // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled"
220             // will all match "double-barrelled".
221             final List<String> nameParts = splitIntoFtsTokens(name);
222             if (nameParts.size() > 1) {
223                 for (String namePart : nameParts) {
224                     if (!TextUtils.isEmpty(namePart)) {
225                         appendNameInternal(namePart);
226                     }
227                 }
228             }
229         }
230 
231         /**
232          * Normalize a name and add to {@link #mSbName}
233          */
appendNameInternal(String name)234         private void appendNameInternal(String name) {
235             if (mSbName.length() != 0) {
236                 mSbName.append(' ');
237             }
238             mSbName.append(NameNormalizer.normalize(name));
239         }
240     }
241 
242     private final ContactsProvider2 mContactsProvider;
243     private final ContactsDatabaseHelper mDbHelper;
244     private StringBuilder mSb = new StringBuilder();
245     private IndexBuilder mIndexBuilder = new IndexBuilder();
246     private ContentValues mValues = new ContentValues();
247     private String[] mSelectionArgs1 = new String[1];
248 
SearchIndexManager(ContactsProvider2 contactsProvider)249     public SearchIndexManager(ContactsProvider2 contactsProvider) {
250         this.mContactsProvider = contactsProvider;
251         mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper();
252     }
253 
updateIndex(boolean force)254     public void updateIndex(boolean force) {
255         if (force) {
256             setSearchIndexVersion(0);
257         } else {
258             if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) {
259                 return;
260             }
261         }
262         SQLiteDatabase db = mDbHelper.getWritableDatabase();
263         db.beginTransaction();
264         try {
265             // We do a version check again, because the version might have been modified after
266             // the first check.  We need to do the check again in a transaction to make sure.
267             if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) {
268                 rebuildIndex(db);
269                 setSearchIndexVersion(SEARCH_INDEX_VERSION);
270                 db.setTransactionSuccessful();
271             }
272         } finally {
273             db.endTransaction();
274         }
275     }
276 
rebuildIndex(SQLiteDatabase db)277     private void rebuildIndex(SQLiteDatabase db) {
278         mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_UPGRADING);
279         final long start = SystemClock.elapsedRealtime();
280         int count = 0;
281         try {
282             mDbHelper.createSearchIndexTable(db, true);
283             count = buildAndInsertIndex(db, null);
284         } finally {
285             mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_NORMAL);
286 
287             final long end = SystemClock.elapsedRealtime();
288             Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, "
289                     + count + " contacts");
290         }
291     }
292 
updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds)293     public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) {
294         if (VERBOSE_LOGGING) {
295             Log.v(TAG, "Updating search index for " + contactIds.size() +
296                     " contacts / " + rawContactIds.size() + " raw contacts");
297         }
298         StringBuilder sb = new StringBuilder();
299         sb.append("(");
300         if (!contactIds.isEmpty()) {
301             // Select all raw contacts that belong to all contacts in contactIds
302             sb.append(RawContacts.CONTACT_ID + " IN (");
303             sb.append(TextUtils.join(",", contactIds));
304             sb.append(')');
305         }
306         if (!rawContactIds.isEmpty()) {
307             if (!contactIds.isEmpty()) {
308                 sb.append(" OR ");
309             }
310             // Select all raw contacts that belong to the same contact as all raw contacts
311             // in rawContactIds. For every raw contact in rawContactIds that we are updating
312             // the index for, we need to rebuild the search index for all raw contacts belonging
313             // to the same contact, because we can only update the search index on a per-contact
314             // basis.
315             sb.append(RawContacts.CONTACT_ID + " IN " +
316                     "(SELECT " + RawContacts.CONTACT_ID + " FROM " + Tables.RAW_CONTACTS +
317                     " WHERE " + RawContactsColumns.CONCRETE_ID + " IN (");
318             sb.append(TextUtils.join(",", rawContactIds));
319             sb.append("))");
320         }
321 
322         sb.append(")");
323 
324         // The selection to select raw_contacts.
325         final String rawContactsSelection = sb.toString();
326 
327         // Remove affected search_index rows.
328         final SQLiteDatabase db = mDbHelper.getWritableDatabase();
329         final int deleted = db.delete(Tables.SEARCH_INDEX,
330                 SearchIndexColumns.CONTACT_ID + " IN (SELECT " +
331                     RawContacts.CONTACT_ID +
332                     " FROM " + Tables.RAW_CONTACTS +
333                     " WHERE " + rawContactsSelection +
334                     ")"
335                 , null);
336 
337         // Then rebuild index for them.
338         final int count = buildAndInsertIndex(db, rawContactsSelection);
339 
340         if (VERBOSE_LOGGING) {
341             Log.v(TAG, "Updated search index for " + count + " contacts");
342         }
343     }
344 
buildAndInsertIndex(SQLiteDatabase db, String selection)345     private int buildAndInsertIndex(SQLiteDatabase db, String selection) {
346         mSb.setLength(0);
347         mSb.append(Data.CONTACT_ID + ", ");
348         mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "=");
349         mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE));
350         mSb.append(" THEN -4 ");
351         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
352         mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE));
353         mSb.append(" THEN -3 ");
354         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
355         mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE));
356         mSb.append(" THEN -2");
357         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
358         mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE));
359         mSb.append(" THEN -1");
360         mSb.append(" ELSE " + DataColumns.MIMETYPE_ID);
361         mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID);
362 
363         int count = 0;
364         Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS,
365                 selection, null, null, null, mSb.toString());
366         mIndexBuilder.setCursor(cursor);
367         mIndexBuilder.reset();
368         try {
369             long currentContactId = -1;
370             while (cursor.moveToNext()) {
371                 long contactId = cursor.getLong(0);
372                 if (contactId != currentContactId) {
373                     if (currentContactId != -1) {
374                         insertIndexRow(db, currentContactId, mIndexBuilder);
375                         count++;
376                     }
377                     currentContactId = contactId;
378                     mIndexBuilder.reset();
379                 }
380                 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE);
381                 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype);
382                 if (dataRowHandler.hasSearchableData()) {
383                     dataRowHandler.appendSearchableData(mIndexBuilder);
384                     mIndexBuilder.commit();
385                 }
386             }
387             if (currentContactId != -1) {
388                 insertIndexRow(db, currentContactId, mIndexBuilder);
389                 count++;
390             }
391         } finally {
392             cursor.close();
393         }
394         return count;
395     }
396 
insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder)397     private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) {
398         mValues.clear();
399         mValues.put(SearchIndexColumns.CONTENT, builder.getContent());
400         mValues.put(SearchIndexColumns.NAME, builder.getName());
401         mValues.put(SearchIndexColumns.TOKENS, builder.getTokens());
402         mValues.put(SearchIndexColumns.CONTACT_ID, contactId);
403         db.insert(Tables.SEARCH_INDEX, null, mValues);
404     }
getSearchIndexVersion()405     private int getSearchIndexVersion() {
406         return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0"));
407     }
408 
setSearchIndexVersion(int version)409     private void setSearchIndexVersion(int version) {
410         mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version));
411     }
412 
413     /**
414      * Token separator that matches SQLite's "simple" tokenizer.
415      * - Unicode codepoints >= 128: Everything
416      * - Unicode codepoints < 128: Alphanumeric and "_"
417      * - Everything else is a separator of tokens
418      */
419     private static final Pattern FTS_TOKEN_SEPARATOR_RE =
420             Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]");
421 
422     /**
423      * Tokenize a string in the way as that of SQLite's "simple" tokenizer.
424      */
425     @VisibleForTesting
splitIntoFtsTokens(String s)426     static List<String> splitIntoFtsTokens(String s) {
427         final ArrayList<String> ret = Lists.newArrayList();
428         for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) {
429             if (!TextUtils.isEmpty(token)) {
430                 ret.add(token);
431             }
432         }
433         return ret;
434     }
435 
436     /**
437      * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same
438      * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then
439      * returned as a String.
440      * @see FtsQueryBuilder#UNSCOPED_NORMALIZING
441      * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING
442      */
getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder)443     public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) {
444         final StringBuilder result = new StringBuilder();
445         for (String token : splitIntoFtsTokens(query)) {
446             ftsQueryBuilder.addToken(result, token);
447         }
448         return result.toString();
449     }
450 
451     public static abstract class FtsQueryBuilder {
addToken(StringBuilder builder, String token)452         public abstract void addToken(StringBuilder builder, String token);
453 
454         /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */
455         public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder();
456 
457         /**
458          * Scopes each token to a column and normalizes the name.
459          * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*"
460          */
461         public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING =
462                 new ScopedNameNormalizingBuilder();
463 
464         /**
465          * Scopes each token to a the content column and also for name with normalization.
466          * Also adds a user-defined expression to each token. This allows common criteria to be
467          * concatenated to each token.
468          * Example (commonCriteria=" OR tokens:123*"):
469          * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*"
470          */
getDigitsQueryBuilder(final String commonCriteria)471         public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) {
472             return new FtsQueryBuilder() {
473                 @Override
474                 public void addToken(StringBuilder builder, String token) {
475                     if (builder.length() != 0) builder.append(' ');
476 
477                     builder.append("content:");
478                     builder.append(token);
479                     builder.append("* ");
480 
481                     final String normalizedToken = NameNormalizer.normalize(token);
482                     if (!TextUtils.isEmpty(normalizedToken)) {
483                         builder.append(" OR name:");
484                         builder.append(normalizedToken);
485                         builder.append('*');
486                     }
487 
488                     builder.append(commonCriteria);
489                 }
490             };
491         }
492     }
493 
494     private static class UnscopedNormalizingBuilder extends FtsQueryBuilder {
495         @Override
496         public void addToken(StringBuilder builder, String token) {
497             if (builder.length() != 0) builder.append(' ');
498 
499             // the token could be empty (if the search query was "_"). we should still emit it
500             // here, as we otherwise risk to end up with an empty MATCH-expression MATCH ""
501             builder.append(NameNormalizer.normalize(token));
502             builder.append('*');
503         }
504     }
505 
506     private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder {
507         @Override
508         public void addToken(StringBuilder builder, String token) {
509             if (builder.length() != 0) builder.append(' ');
510 
511             builder.append("content:");
512             builder.append(token);
513             builder.append('*');
514 
515             final String normalizedToken = NameNormalizer.normalize(token);
516             if (!TextUtils.isEmpty(normalizedToken)) {
517                 builder.append(" OR name:");
518                 builder.append(normalizedToken);
519                 builder.append('*');
520             }
521 
522             builder.append(" OR tokens:");
523             builder.append(token);
524             builder.append("*");
525         }
526     }
527 }
528