1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 package com.android.providers.contacts; 17 18 import android.content.ContentValues; 19 import android.database.Cursor; 20 import android.database.sqlite.SQLiteDatabase; 21 import android.os.SystemClock; 22 import android.provider.ContactsContract.CommonDataKinds.Email; 23 import android.provider.ContactsContract.CommonDataKinds.Nickname; 24 import android.provider.ContactsContract.CommonDataKinds.Organization; 25 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal; 26 import android.provider.ContactsContract.Data; 27 import android.provider.ContactsContract.RawContacts; 28 import android.text.TextUtils; 29 import android.util.ArraySet; 30 import android.util.Log; 31 32 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns; 33 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns; 34 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns; 35 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns; 36 import com.android.providers.contacts.ContactsDatabaseHelper.Tables; 37 import com.android.providers.contacts.util.CappedStringBuilder; 38 39 import com.google.android.collect.Lists; 40 import com.google.common.annotations.VisibleForTesting; 41 42 import java.util.ArrayList; 43 import java.util.List; 44 import java.util.Set; 45 import java.util.regex.Pattern; 46 47 /** 48 * Maintains a search index for comprehensive contact search. 49 */ 50 public class SearchIndexManager { 51 private static final String TAG = "ContactsFTS"; 52 53 private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE); 54 55 private static final int MAX_STRING_BUILDER_SIZE = 1024 * 10; 56 57 public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index"; 58 private static final int SEARCH_INDEX_VERSION = 1; 59 60 private static final class ContactIndexQuery { 61 public static final String[] COLUMNS = { 62 Data.CONTACT_ID, 63 MimetypesColumns.MIMETYPE, 64 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5, 65 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11, 66 Data.DATA12, Data.DATA13, Data.DATA14 67 }; 68 69 public static final int MIMETYPE = 1; 70 } 71 72 public static class IndexBuilder { 73 public static final int SEPARATOR_SPACE = 0; 74 public static final int SEPARATOR_PARENTHESES = 1; 75 public static final int SEPARATOR_SLASH = 2; 76 public static final int SEPARATOR_COMMA = 3; 77 78 private CappedStringBuilder mSbContent = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE); 79 private CappedStringBuilder mSbName = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE); 80 private CappedStringBuilder mSbTokens = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE); 81 private CappedStringBuilder mSbElementContent = new CappedStringBuilder( 82 MAX_STRING_BUILDER_SIZE); 83 private ArraySet<String> mUniqueElements = new ArraySet<>(); 84 private Cursor mCursor; 85 setCursor(Cursor cursor)86 void setCursor(Cursor cursor) { 87 this.mCursor = cursor; 88 } 89 reset()90 void reset() { 91 mSbContent.clear(); 92 mSbTokens.clear(); 93 mSbName.clear(); 94 mSbElementContent.clear(); 95 mUniqueElements.clear(); 96 } 97 getContent()98 public String getContent() { 99 return mSbContent.length() == 0 ? null : mSbContent.toString(); 100 } 101 getName()102 public String getName() { 103 return mSbName.length() == 0 ? null : mSbName.toString(); 104 } 105 getTokens()106 public String getTokens() { 107 return mSbTokens.length() == 0 ? null : mSbTokens.toString(); 108 } 109 getString(String columnName)110 public String getString(String columnName) { 111 return mCursor.getString(mCursor.getColumnIndex(columnName)); 112 } 113 getInt(String columnName)114 public int getInt(String columnName) { 115 return mCursor.getInt(mCursor.getColumnIndex(columnName)); 116 } 117 118 @Override toString()119 public String toString() { 120 return "Content: " + mSbContent + "\n Name: " + mSbName + "\n Tokens: " + mSbTokens; 121 } 122 commit()123 public void commit() { 124 if (mSbElementContent.length() != 0) { 125 String content = mSbElementContent.toString().replace('\n', ' '); 126 if (!mUniqueElements.contains(content)) { 127 if (mSbContent.length() != 0) { 128 mSbContent.append('\n'); 129 } 130 mSbContent.append(content); 131 mUniqueElements.add(content); 132 } 133 mSbElementContent.clear(); 134 } 135 } 136 appendContentFromColumn(String columnName)137 public void appendContentFromColumn(String columnName) { 138 appendContentFromColumn(columnName, SEPARATOR_SPACE); 139 } 140 appendContentFromColumn(String columnName, int format)141 public void appendContentFromColumn(String columnName, int format) { 142 appendContent(getString(columnName), format); 143 } 144 appendContent(String value)145 public void appendContent(String value) { 146 appendContent(value, SEPARATOR_SPACE); 147 } 148 appendContent(String value, int format)149 private void appendContent(String value, int format) { 150 if (TextUtils.isEmpty(value)) { 151 return; 152 } 153 154 switch (format) { 155 case SEPARATOR_SPACE: 156 if (mSbElementContent.length() > 0) { 157 mSbElementContent.append(' '); 158 } 159 mSbElementContent.append(value); 160 break; 161 162 case SEPARATOR_SLASH: 163 mSbElementContent.append('/').append(value); 164 break; 165 166 case SEPARATOR_PARENTHESES: 167 if (mSbElementContent.length() > 0) { 168 mSbElementContent.append(' '); 169 } 170 mSbElementContent.append('(').append(value).append(')'); 171 break; 172 173 case SEPARATOR_COMMA: 174 if (mSbElementContent.length() > 0) { 175 mSbElementContent.append(", "); 176 } 177 mSbElementContent.append(value); 178 break; 179 } 180 } 181 appendToken(String token)182 public void appendToken(String token) { 183 if (TextUtils.isEmpty(token)) { 184 return; 185 } 186 187 if (mSbTokens.length() != 0) { 188 mSbTokens.append(' '); 189 } 190 mSbTokens.append(token); 191 } 192 appendNameFromColumn(String columnName)193 public void appendNameFromColumn(String columnName) { 194 appendName(getString(columnName)); 195 } 196 appendName(String name)197 public void appendName(String name) { 198 if (TextUtils.isEmpty(name)) { 199 return; 200 } 201 // First, put the original name. 202 appendNameInternal(name); 203 204 // Then, if the name contains more than one FTS token, put each token into the index 205 // too. 206 // 207 // This is to make names with special characters searchable, such as "double-barrelled" 208 // "L'Image". 209 // 210 // Here's how it works: 211 // Because we "normalize" names when putting into the index, if we only put 212 // "double-barrelled", the index will only contain "doublebarrelled". 213 // Now, if the user searches for "double-barrelled", the searcher tokenizes it into 214 // two tokens, "double" and "barrelled". The first one matches "doublebarrelled" 215 // but the second one doesn't (because we only do the prefix match), so 216 // "doublebarrelled" doesn't match. 217 // So, here, we put each token in a name into the index too. In the case above, 218 // we put also "double" and "barrelled". 219 // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled" 220 // will all match "double-barrelled". 221 final List<String> nameParts = splitIntoFtsTokens(name); 222 if (nameParts.size() > 1) { 223 for (String namePart : nameParts) { 224 if (!TextUtils.isEmpty(namePart)) { 225 appendNameInternal(namePart); 226 } 227 } 228 } 229 } 230 231 /** 232 * Normalize a name and add to {@link #mSbName} 233 */ appendNameInternal(String name)234 private void appendNameInternal(String name) { 235 if (mSbName.length() != 0) { 236 mSbName.append(' '); 237 } 238 mSbName.append(NameNormalizer.normalize(name)); 239 } 240 } 241 242 private final ContactsProvider2 mContactsProvider; 243 private final ContactsDatabaseHelper mDbHelper; 244 private StringBuilder mSb = new StringBuilder(); 245 private IndexBuilder mIndexBuilder = new IndexBuilder(); 246 private ContentValues mValues = new ContentValues(); 247 private String[] mSelectionArgs1 = new String[1]; 248 SearchIndexManager(ContactsProvider2 contactsProvider)249 public SearchIndexManager(ContactsProvider2 contactsProvider) { 250 this.mContactsProvider = contactsProvider; 251 mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper(); 252 } 253 updateIndex(boolean force)254 public void updateIndex(boolean force) { 255 if (force) { 256 setSearchIndexVersion(0); 257 } else { 258 if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) { 259 return; 260 } 261 } 262 SQLiteDatabase db = mDbHelper.getWritableDatabase(); 263 db.beginTransaction(); 264 try { 265 // We do a version check again, because the version might have been modified after 266 // the first check. We need to do the check again in a transaction to make sure. 267 if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) { 268 rebuildIndex(db); 269 setSearchIndexVersion(SEARCH_INDEX_VERSION); 270 db.setTransactionSuccessful(); 271 } 272 } finally { 273 db.endTransaction(); 274 } 275 } 276 rebuildIndex(SQLiteDatabase db)277 private void rebuildIndex(SQLiteDatabase db) { 278 mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_UPGRADING); 279 final long start = SystemClock.elapsedRealtime(); 280 int count = 0; 281 try { 282 mDbHelper.createSearchIndexTable(db, true); 283 count = buildAndInsertIndex(db, null); 284 } finally { 285 mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_NORMAL); 286 287 final long end = SystemClock.elapsedRealtime(); 288 Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, " 289 + count + " contacts"); 290 } 291 } 292 updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds)293 public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) { 294 if (VERBOSE_LOGGING) { 295 Log.v(TAG, "Updating search index for " + contactIds.size() + 296 " contacts / " + rawContactIds.size() + " raw contacts"); 297 } 298 StringBuilder sb = new StringBuilder(); 299 sb.append("("); 300 if (!contactIds.isEmpty()) { 301 // Select all raw contacts that belong to all contacts in contactIds 302 sb.append(RawContacts.CONTACT_ID + " IN ("); 303 sb.append(TextUtils.join(",", contactIds)); 304 sb.append(')'); 305 } 306 if (!rawContactIds.isEmpty()) { 307 if (!contactIds.isEmpty()) { 308 sb.append(" OR "); 309 } 310 // Select all raw contacts that belong to the same contact as all raw contacts 311 // in rawContactIds. For every raw contact in rawContactIds that we are updating 312 // the index for, we need to rebuild the search index for all raw contacts belonging 313 // to the same contact, because we can only update the search index on a per-contact 314 // basis. 315 sb.append(RawContacts.CONTACT_ID + " IN " + 316 "(SELECT " + RawContacts.CONTACT_ID + " FROM " + Tables.RAW_CONTACTS + 317 " WHERE " + RawContactsColumns.CONCRETE_ID + " IN ("); 318 sb.append(TextUtils.join(",", rawContactIds)); 319 sb.append("))"); 320 } 321 322 sb.append(")"); 323 324 // The selection to select raw_contacts. 325 final String rawContactsSelection = sb.toString(); 326 327 // Remove affected search_index rows. 328 final SQLiteDatabase db = mDbHelper.getWritableDatabase(); 329 final int deleted = db.delete(Tables.SEARCH_INDEX, 330 SearchIndexColumns.CONTACT_ID + " IN (SELECT " + 331 RawContacts.CONTACT_ID + 332 " FROM " + Tables.RAW_CONTACTS + 333 " WHERE " + rawContactsSelection + 334 ")" 335 , null); 336 337 // Then rebuild index for them. 338 final int count = buildAndInsertIndex(db, rawContactsSelection); 339 340 if (VERBOSE_LOGGING) { 341 Log.v(TAG, "Updated search index for " + count + " contacts"); 342 } 343 } 344 buildAndInsertIndex(SQLiteDatabase db, String selection)345 private int buildAndInsertIndex(SQLiteDatabase db, String selection) { 346 mSb.setLength(0); 347 mSb.append(Data.CONTACT_ID + ", "); 348 mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "="); 349 mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE)); 350 mSb.append(" THEN -4 "); 351 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 352 mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE)); 353 mSb.append(" THEN -3 "); 354 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 355 mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE)); 356 mSb.append(" THEN -2"); 357 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 358 mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE)); 359 mSb.append(" THEN -1"); 360 mSb.append(" ELSE " + DataColumns.MIMETYPE_ID); 361 mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID); 362 363 int count = 0; 364 Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS, 365 selection, null, null, null, mSb.toString()); 366 mIndexBuilder.setCursor(cursor); 367 mIndexBuilder.reset(); 368 try { 369 long currentContactId = -1; 370 while (cursor.moveToNext()) { 371 long contactId = cursor.getLong(0); 372 if (contactId != currentContactId) { 373 if (currentContactId != -1) { 374 insertIndexRow(db, currentContactId, mIndexBuilder); 375 count++; 376 } 377 currentContactId = contactId; 378 mIndexBuilder.reset(); 379 } 380 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE); 381 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype); 382 if (dataRowHandler.hasSearchableData()) { 383 dataRowHandler.appendSearchableData(mIndexBuilder); 384 mIndexBuilder.commit(); 385 } 386 } 387 if (currentContactId != -1) { 388 insertIndexRow(db, currentContactId, mIndexBuilder); 389 count++; 390 } 391 } finally { 392 cursor.close(); 393 } 394 return count; 395 } 396 insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder)397 private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) { 398 mValues.clear(); 399 mValues.put(SearchIndexColumns.CONTENT, builder.getContent()); 400 mValues.put(SearchIndexColumns.NAME, builder.getName()); 401 mValues.put(SearchIndexColumns.TOKENS, builder.getTokens()); 402 mValues.put(SearchIndexColumns.CONTACT_ID, contactId); 403 db.insert(Tables.SEARCH_INDEX, null, mValues); 404 } getSearchIndexVersion()405 private int getSearchIndexVersion() { 406 return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0")); 407 } 408 setSearchIndexVersion(int version)409 private void setSearchIndexVersion(int version) { 410 mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version)); 411 } 412 413 /** 414 * Token separator that matches SQLite's "simple" tokenizer. 415 * - Unicode codepoints >= 128: Everything 416 * - Unicode codepoints < 128: Alphanumeric and "_" 417 * - Everything else is a separator of tokens 418 */ 419 private static final Pattern FTS_TOKEN_SEPARATOR_RE = 420 Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]"); 421 422 /** 423 * Tokenize a string in the way as that of SQLite's "simple" tokenizer. 424 */ 425 @VisibleForTesting splitIntoFtsTokens(String s)426 static List<String> splitIntoFtsTokens(String s) { 427 final ArrayList<String> ret = Lists.newArrayList(); 428 for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) { 429 if (!TextUtils.isEmpty(token)) { 430 ret.add(token); 431 } 432 } 433 return ret; 434 } 435 436 /** 437 * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same 438 * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then 439 * returned as a String. 440 * @see FtsQueryBuilder#UNSCOPED_NORMALIZING 441 * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING 442 */ getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder)443 public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) { 444 final StringBuilder result = new StringBuilder(); 445 for (String token : splitIntoFtsTokens(query)) { 446 ftsQueryBuilder.addToken(result, token); 447 } 448 return result.toString(); 449 } 450 451 public static abstract class FtsQueryBuilder { addToken(StringBuilder builder, String token)452 public abstract void addToken(StringBuilder builder, String token); 453 454 /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */ 455 public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder(); 456 457 /** 458 * Scopes each token to a column and normalizes the name. 459 * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*" 460 */ 461 public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING = 462 new ScopedNameNormalizingBuilder(); 463 464 /** 465 * Scopes each token to a the content column and also for name with normalization. 466 * Also adds a user-defined expression to each token. This allows common criteria to be 467 * concatenated to each token. 468 * Example (commonCriteria=" OR tokens:123*"): 469 * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*" 470 */ getDigitsQueryBuilder(final String commonCriteria)471 public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) { 472 return new FtsQueryBuilder() { 473 @Override 474 public void addToken(StringBuilder builder, String token) { 475 if (builder.length() != 0) builder.append(' '); 476 477 builder.append("content:"); 478 builder.append(token); 479 builder.append("* "); 480 481 final String normalizedToken = NameNormalizer.normalize(token); 482 if (!TextUtils.isEmpty(normalizedToken)) { 483 builder.append(" OR name:"); 484 builder.append(normalizedToken); 485 builder.append('*'); 486 } 487 488 builder.append(commonCriteria); 489 } 490 }; 491 } 492 } 493 494 private static class UnscopedNormalizingBuilder extends FtsQueryBuilder { 495 @Override 496 public void addToken(StringBuilder builder, String token) { 497 if (builder.length() != 0) builder.append(' '); 498 499 // the token could be empty (if the search query was "_"). we should still emit it 500 // here, as we otherwise risk to end up with an empty MATCH-expression MATCH "" 501 builder.append(NameNormalizer.normalize(token)); 502 builder.append('*'); 503 } 504 } 505 506 private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder { 507 @Override 508 public void addToken(StringBuilder builder, String token) { 509 if (builder.length() != 0) builder.append(' '); 510 511 builder.append("content:"); 512 builder.append(token); 513 builder.append('*'); 514 515 final String normalizedToken = NameNormalizer.normalize(token); 516 if (!TextUtils.isEmpty(normalizedToken)) { 517 builder.append(" OR name:"); 518 builder.append(normalizedToken); 519 builder.append('*'); 520 } 521 522 builder.append(" OR tokens:"); 523 builder.append(token); 524 builder.append("*"); 525 } 526 } 527 } 528