1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import static com.android.server.pm.PackageManagerServiceUtils.logCriticalInfo; 20 21 import android.content.ContentResolver; 22 import android.content.Context; 23 import android.os.Build; 24 import android.os.Environment; 25 import android.os.FileUtils; 26 import android.os.RecoverySystem; 27 import android.os.SystemClock; 28 import android.os.SystemProperties; 29 import android.os.UserHandle; 30 import android.provider.Settings; 31 import android.text.format.DateUtils; 32 import android.util.ExceptionUtils; 33 import android.util.Log; 34 import android.util.MathUtils; 35 import android.util.Slog; 36 import android.util.SparseArray; 37 import android.util.StatsLog; 38 39 import com.android.internal.annotations.VisibleForTesting; 40 import com.android.internal.util.ArrayUtils; 41 import com.android.server.am.SettingsToPropertiesMapper; 42 import com.android.server.utils.FlagNamespaceUtils; 43 44 import java.io.File; 45 import java.util.Arrays; 46 47 /** 48 * Utilities to help rescue the system from crash loops. Callers are expected to 49 * report boot events and persistent app crashes, and if they happen frequently 50 * enough this class will slowly escalate through several rescue operations 51 * before finally rebooting and prompting the user if they want to wipe data as 52 * a last resort. 53 * 54 * @hide 55 */ 56 public class RescueParty { 57 @VisibleForTesting 58 static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue"; 59 @VisibleForTesting 60 static final int TRIGGER_COUNT = 5; 61 @VisibleForTesting 62 static final String PROP_RESCUE_LEVEL = "sys.rescue_level"; 63 @VisibleForTesting 64 static final int LEVEL_NONE = 0; 65 @VisibleForTesting 66 static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1; 67 @VisibleForTesting 68 static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2; 69 @VisibleForTesting 70 static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3; 71 @VisibleForTesting 72 static final int LEVEL_FACTORY_RESET = 4; 73 @VisibleForTesting 74 static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count"; 75 /** 76 * The boot trigger window size must always be greater than Watchdog's deadlock timeout 77 * {@link Watchdog#DEFAULT_TIMEOUT}. 78 */ 79 @VisibleForTesting 80 static final long BOOT_TRIGGER_WINDOW_MILLIS = 600 * DateUtils.SECOND_IN_MILLIS; 81 @VisibleForTesting 82 static final long PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS = 30 * DateUtils.SECOND_IN_MILLIS; 83 @VisibleForTesting 84 static final String TAG = "RescueParty"; 85 86 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue"; 87 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start"; 88 private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device"; 89 90 /** Threshold for boot loops */ 91 private static final Threshold sBoot = new BootThreshold(); 92 /** Threshold for app crash loops */ 93 private static SparseArray<Threshold> sApps = new SparseArray<>(); 94 isDisabled()95 private static boolean isDisabled() { 96 // Check if we're explicitly enabled for testing 97 if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) { 98 return false; 99 } 100 101 // We're disabled on all engineering devices 102 if (Build.IS_ENG) { 103 Slog.v(TAG, "Disabled because of eng build"); 104 return true; 105 } 106 107 // We're disabled on userdebug devices connected over USB, since that's 108 // a decent signal that someone is actively trying to debug the device, 109 // or that it's in a lab environment. 110 if (Build.IS_USERDEBUG && isUsbActive()) { 111 Slog.v(TAG, "Disabled because of active USB connection"); 112 return true; 113 } 114 115 // One last-ditch check 116 if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) { 117 Slog.v(TAG, "Disabled because of manual property"); 118 return true; 119 } 120 121 return false; 122 } 123 124 /** 125 * Take note of a boot event. If we notice too many of these events 126 * happening in rapid succession, we'll send out a rescue party. 127 */ noteBoot(Context context)128 public static void noteBoot(Context context) { 129 if (isDisabled()) return; 130 if (sBoot.incrementAndTest()) { 131 sBoot.reset(); 132 incrementRescueLevel(sBoot.uid); 133 executeRescueLevel(context); 134 } 135 } 136 137 /** 138 * Take note of a persistent app or apex module crash. If we notice too many of these 139 * events happening in rapid succession, we'll send out a rescue party. 140 */ noteAppCrash(Context context, int uid)141 public static void noteAppCrash(Context context, int uid) { 142 if (isDisabled()) return; 143 Threshold t = sApps.get(uid); 144 if (t == null) { 145 t = new AppThreshold(uid); 146 sApps.put(uid, t); 147 } 148 if (t.incrementAndTest()) { 149 t.reset(); 150 incrementRescueLevel(t.uid); 151 executeRescueLevel(context); 152 } 153 } 154 155 /** 156 * Check if we're currently attempting to reboot for a factory reset. 157 */ isAttemptingFactoryReset()158 public static boolean isAttemptingFactoryReset() { 159 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET; 160 } 161 162 /** 163 * Called when {@code SettingsProvider} has been published, which is a good 164 * opportunity to reset any settings depending on our rescue level. 165 */ onSettingsProviderPublished(Context context)166 public static void onSettingsProviderPublished(Context context) { 167 handleNativeRescuePartyResets(); 168 executeRescueLevel(context); 169 } 170 171 @VisibleForTesting resetAllThresholds()172 static void resetAllThresholds() { 173 sBoot.reset(); 174 175 for (int i = 0; i < sApps.size(); i++) { 176 Threshold appThreshold = sApps.get(sApps.keyAt(i)); 177 appThreshold.reset(); 178 } 179 } 180 181 @VisibleForTesting getElapsedRealtime()182 static long getElapsedRealtime() { 183 return SystemClock.elapsedRealtime(); 184 } 185 handleNativeRescuePartyResets()186 private static void handleNativeRescuePartyResets() { 187 if (SettingsToPropertiesMapper.isNativeFlagsResetPerformed()) { 188 FlagNamespaceUtils.resetDeviceConfig(Settings.RESET_MODE_TRUSTED_DEFAULTS, 189 Arrays.asList(SettingsToPropertiesMapper.getResetNativeCategories())); 190 } 191 } 192 193 /** 194 * Escalate to the next rescue level. After incrementing the level you'll 195 * probably want to call {@link #executeRescueLevel(Context)}. 196 */ incrementRescueLevel(int triggerUid)197 private static void incrementRescueLevel(int triggerUid) { 198 final int level = MathUtils.constrain( 199 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1, 200 LEVEL_NONE, LEVEL_FACTORY_RESET); 201 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level)); 202 203 EventLogTags.writeRescueLevel(level, triggerUid); 204 logCriticalInfo(Log.WARN, "Incremented rescue level to " 205 + levelToString(level) + " triggered by UID " + triggerUid); 206 } 207 executeRescueLevel(Context context)208 private static void executeRescueLevel(Context context) { 209 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE); 210 if (level == LEVEL_NONE) return; 211 212 Slog.w(TAG, "Attempting rescue level " + levelToString(level)); 213 try { 214 executeRescueLevelInternal(context, level); 215 EventLogTags.writeRescueSuccess(level); 216 logCriticalInfo(Log.DEBUG, 217 "Finished rescue level " + levelToString(level)); 218 } catch (Throwable t) { 219 final String msg = ExceptionUtils.getCompleteMessage(t); 220 EventLogTags.writeRescueFailure(level, msg); 221 logCriticalInfo(Log.ERROR, 222 "Failed rescue level " + levelToString(level) + ": " + msg); 223 } 224 } 225 executeRescueLevelInternal(Context context, int level)226 private static void executeRescueLevelInternal(Context context, int level) throws Exception { 227 StatsLog.write(StatsLog.RESCUE_PARTY_RESET_REPORTED, level); 228 switch (level) { 229 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: 230 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS); 231 break; 232 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: 233 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES); 234 break; 235 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: 236 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS); 237 break; 238 case LEVEL_FACTORY_RESET: 239 RecoverySystem.rebootPromptAndWipeUserData(context, TAG); 240 break; 241 } 242 FlagNamespaceUtils.addToKnownResetNamespaces( 243 FlagNamespaceUtils.NAMESPACE_NO_PACKAGE); 244 } 245 resetAllSettings(Context context, int mode)246 private static void resetAllSettings(Context context, int mode) throws Exception { 247 // Try our best to reset all settings possible, and once finished 248 // rethrow any exception that we encountered 249 Exception res = null; 250 final ContentResolver resolver = context.getContentResolver(); 251 try { 252 FlagNamespaceUtils.resetDeviceConfig(mode); 253 } catch (Exception e) { 254 res = new RuntimeException("Failed to reset config settings", e); 255 } 256 try { 257 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM); 258 } catch (Exception e) { 259 res = new RuntimeException("Failed to reset global settings", e); 260 } 261 for (int userId : getAllUserIds()) { 262 try { 263 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId); 264 } catch (Exception e) { 265 res = new RuntimeException("Failed to reset secure settings for " + userId, e); 266 } 267 } 268 if (res != null) { 269 throw res; 270 } 271 } 272 273 /** 274 * Threshold that can be triggered if a number of events occur within a 275 * window of time. 276 */ 277 private abstract static class Threshold { getCount()278 public abstract int getCount(); setCount(int count)279 public abstract void setCount(int count); getStart()280 public abstract long getStart(); setStart(long start)281 public abstract void setStart(long start); 282 283 private final int uid; 284 private final int triggerCount; 285 private final long triggerWindow; 286 Threshold(int uid, int triggerCount, long triggerWindow)287 public Threshold(int uid, int triggerCount, long triggerWindow) { 288 this.uid = uid; 289 this.triggerCount = triggerCount; 290 this.triggerWindow = triggerWindow; 291 } 292 reset()293 public void reset() { 294 setCount(0); 295 setStart(0); 296 } 297 298 /** 299 * @return if this threshold has been triggered 300 */ incrementAndTest()301 public boolean incrementAndTest() { 302 final long now = getElapsedRealtime(); 303 final long window = now - getStart(); 304 if (window > triggerWindow) { 305 setCount(1); 306 setStart(now); 307 return false; 308 } else { 309 int count = getCount() + 1; 310 setCount(count); 311 EventLogTags.writeRescueNote(uid, count, window); 312 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last " 313 + (window / 1000) + " sec"); 314 return (count >= triggerCount); 315 } 316 } 317 } 318 319 /** 320 * Specialization of {@link Threshold} for monitoring boot events. It stores 321 * counters in system properties for robustness. 322 */ 323 private static class BootThreshold extends Threshold { BootThreshold()324 public BootThreshold() { 325 // We're interested in TRIGGER_COUNT events in any 326 // BOOT_TRIGGER_WINDOW_MILLIS second period; this window is super relaxed because 327 // booting can take a long time if forced to dexopt things. 328 super(android.os.Process.ROOT_UID, TRIGGER_COUNT, BOOT_TRIGGER_WINDOW_MILLIS); 329 } 330 331 @Override getCount()332 public int getCount() { 333 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0); 334 } 335 336 @Override setCount(int count)337 public void setCount(int count) { 338 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count)); 339 } 340 341 @Override getStart()342 public long getStart() { 343 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0); 344 } 345 346 @Override setStart(long start)347 public void setStart(long start) { 348 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start)); 349 } 350 } 351 352 /** 353 * Specialization of {@link Threshold} for monitoring app crashes. It stores 354 * counters in memory. 355 */ 356 private static class AppThreshold extends Threshold { 357 private int count; 358 private long start; 359 AppThreshold(int uid)360 public AppThreshold(int uid) { 361 // We're interested in TRIGGER_COUNT events in any 362 // PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS second period; apps crash pretty quickly 363 // so we can keep a tight leash on them. 364 super(uid, TRIGGER_COUNT, PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS); 365 } 366 getCount()367 @Override public int getCount() { return count; } setCount(int count)368 @Override public void setCount(int count) { this.count = count; } getStart()369 @Override public long getStart() { return start; } setStart(long start)370 @Override public void setStart(long start) { this.start = start; } 371 } 372 getAllUserIds()373 private static int[] getAllUserIds() { 374 int[] userIds = { UserHandle.USER_SYSTEM }; 375 try { 376 for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) { 377 try { 378 final int userId = Integer.parseInt(file.getName()); 379 if (userId != UserHandle.USER_SYSTEM) { 380 userIds = ArrayUtils.appendInt(userIds, userId); 381 } 382 } catch (NumberFormatException ignored) { 383 } 384 } 385 } catch (Throwable t) { 386 Slog.w(TAG, "Trouble discovering users", t); 387 } 388 return userIds; 389 } 390 391 /** 392 * Hacky test to check if the device has an active USB connection, which is 393 * a good proxy for someone doing local development work. 394 */ isUsbActive()395 private static boolean isUsbActive() { 396 if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) { 397 Slog.v(TAG, "Assuming virtual device is connected over USB"); 398 return true; 399 } 400 try { 401 final String state = FileUtils 402 .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, ""); 403 return "CONFIGURED".equals(state.trim()); 404 } catch (Throwable t) { 405 Slog.w(TAG, "Failed to determine if device was on USB", t); 406 return false; 407 } 408 } 409 levelToString(int level)410 private static String levelToString(int level) { 411 switch (level) { 412 case LEVEL_NONE: return "NONE"; 413 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS"; 414 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES"; 415 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS"; 416 case LEVEL_FACTORY_RESET: return "FACTORY_RESET"; 417 default: return Integer.toString(level); 418 } 419 } 420 } 421