1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.net.ip; 18 19 import static android.net.metrics.IpReachabilityEvent.NUD_FAILED; 20 import static android.net.metrics.IpReachabilityEvent.NUD_FAILED_ORGANIC; 21 import static android.net.metrics.IpReachabilityEvent.PROVISIONING_LOST; 22 import static android.net.metrics.IpReachabilityEvent.PROVISIONING_LOST_ORGANIC; 23 24 import android.content.Context; 25 import android.net.ConnectivityManager; 26 import android.net.INetd; 27 import android.net.LinkProperties; 28 import android.net.RouteInfo; 29 import android.net.ip.IpNeighborMonitor.NeighborEvent; 30 import android.net.ip.IpNeighborMonitor.NeighborEventConsumer; 31 import android.net.metrics.IpConnectivityLog; 32 import android.net.metrics.IpReachabilityEvent; 33 import android.net.netlink.StructNdMsg; 34 import android.net.util.InterfaceParams; 35 import android.net.util.SharedLog; 36 import android.os.ConditionVariable; 37 import android.os.Handler; 38 import android.os.Looper; 39 import android.os.PowerManager; 40 import android.os.PowerManager.WakeLock; 41 import android.os.RemoteException; 42 import android.os.SystemClock; 43 import android.text.TextUtils; 44 import android.util.Log; 45 46 import com.android.internal.annotations.VisibleForTesting; 47 import com.android.internal.util.Preconditions; 48 import com.android.networkstack.R; 49 50 import java.io.PrintWriter; 51 import java.net.Inet6Address; 52 import java.net.InetAddress; 53 import java.util.ArrayList; 54 import java.util.HashMap; 55 import java.util.List; 56 import java.util.Map; 57 58 59 /** 60 * IpReachabilityMonitor. 61 * 62 * Monitors on-link IP reachability and notifies callers whenever any on-link 63 * addresses of interest appear to have become unresponsive. 64 * 65 * This code does not concern itself with "why" a neighbour might have become 66 * unreachable. Instead, it primarily reacts to the kernel's notion of IP 67 * reachability for each of the neighbours we know to be critically important 68 * to normal network connectivity. As such, it is often "just the messenger": 69 * the neighbours about which it warns are already deemed by the kernel to have 70 * become unreachable. 71 * 72 * 73 * How it works: 74 * 75 * 1. The "on-link neighbours of interest" found in a given LinkProperties 76 * instance are added to a "watch list" via #updateLinkProperties(). 77 * This usually means all default gateways and any on-link DNS servers. 78 * 79 * 2. We listen continuously for netlink neighbour messages (RTM_NEWNEIGH, 80 * RTM_DELNEIGH), watching only for neighbours in the watch list. 81 * 82 * - A neighbour going into NUD_REACHABLE, NUD_STALE, NUD_DELAY, and 83 * even NUD_PROBE is perfectly normal; we merely record the new state. 84 * 85 * - A neighbour's entry may be deleted (RTM_DELNEIGH), for example due 86 * to garbage collection. This is not necessarily of immediate 87 * concern; we record the neighbour as moving to NUD_NONE. 88 * 89 * - A neighbour transitioning to NUD_FAILED (for any reason) is 90 * critically important and is handled as described below in #4. 91 * 92 * 3. All on-link neighbours in the watch list can be forcibly "probed" by 93 * calling #probeAll(). This should be called whenever it is important to 94 * verify that critical neighbours on the link are still reachable, e.g. 95 * when roaming between BSSIDs. 96 * 97 * - The kernel will send unicast ARP requests for IPv4 neighbours and 98 * unicast NS packets for IPv6 neighbours. The expected replies will 99 * likely be unicast. 100 * 101 * - The forced probing is done holding a wakelock. The kernel may, 102 * however, initiate probing of a neighbor on its own, i.e. whenever 103 * a neighbour has expired from NUD_DELAY. 104 * 105 * - The kernel sends: 106 * 107 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/ucast_solicit 108 * 109 * number of probes (usually 3) every: 110 * 111 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/retrans_time_ms 112 * 113 * number of milliseconds (usually 1000ms). This normally results in 114 * 3 unicast packets, 1 per second. 115 * 116 * - If no response is received to any of the probe packets, the kernel 117 * marks the neighbour as being in state NUD_FAILED, and the listening 118 * process in #2 will learn of it. 119 * 120 * 4. We call the supplied Callback#notifyLost() function if the loss of a 121 * neighbour in NUD_FAILED would cause IPv4 or IPv6 configuration to 122 * become incomplete (a loss of provisioning). 123 * 124 * - For example, losing all our IPv4 on-link DNS servers (or losing 125 * our only IPv6 default gateway) constitutes a loss of IPv4 (IPv6) 126 * provisioning; Callback#notifyLost() would be called. 127 * 128 * - Since it can be non-trivial to reacquire certain IP provisioning 129 * state it may be best for the link to disconnect completely and 130 * reconnect afresh. 131 * 132 * Accessing an instance of this class from multiple threads is NOT safe. 133 * 134 * @hide 135 */ 136 public class IpReachabilityMonitor { 137 private static final String TAG = "IpReachabilityMonitor"; 138 private static final boolean DBG = Log.isLoggable(TAG, Log.DEBUG); 139 private static final boolean VDBG = Log.isLoggable(TAG, Log.VERBOSE); 140 141 // Upper and lower bound for NUD probe parameters. 142 protected static final int MAX_NUD_SOLICIT_NUM = 15; 143 protected static final int MIN_NUD_SOLICIT_NUM = 5; 144 protected static final int MAX_NUD_SOLICIT_INTERVAL_MS = 1000; 145 protected static final int MIN_NUD_SOLICIT_INTERVAL_MS = 750; 146 147 public interface Callback { 148 /** 149 * This callback function must execute as quickly as possible as it is 150 * run on the same thread that listens to kernel neighbor updates. 151 * 152 * TODO: refactor to something like notifyProvisioningLost(String msg). 153 */ notifyLost(InetAddress ip, String logMsg)154 void notifyLost(InetAddress ip, String logMsg); 155 } 156 157 /** 158 * Encapsulates IpReachabilityMonitor dependencies on systems that hinder unit testing. 159 * TODO: consider also wrapping MultinetworkPolicyTracker in this interface. 160 */ 161 interface Dependencies { acquireWakeLock(long durationMs)162 void acquireWakeLock(long durationMs); makeIpNeighborMonitor(Handler h, SharedLog log, NeighborEventConsumer cb)163 IpNeighborMonitor makeIpNeighborMonitor(Handler h, SharedLog log, NeighborEventConsumer cb); 164 makeDefault(Context context, String iface)165 static Dependencies makeDefault(Context context, String iface) { 166 final String lockName = TAG + "." + iface; 167 final PowerManager pm = (PowerManager) context.getSystemService(Context.POWER_SERVICE); 168 final WakeLock lock = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, lockName); 169 170 return new Dependencies() { 171 public void acquireWakeLock(long durationMs) { 172 lock.acquire(durationMs); 173 } 174 175 public IpNeighborMonitor makeIpNeighborMonitor(Handler h, SharedLog log, 176 NeighborEventConsumer cb) { 177 return new IpNeighborMonitor(h, log, cb); 178 } 179 }; 180 } 181 } 182 183 private final InterfaceParams mInterfaceParams; 184 private final IpNeighborMonitor mIpNeighborMonitor; 185 private final SharedLog mLog; 186 private final Callback mCallback; 187 private final Dependencies mDependencies; 188 private final boolean mUsingMultinetworkPolicyTracker; 189 private final ConnectivityManager mCm; 190 private final IpConnectivityLog mMetricsLog; 191 private final Context mContext; 192 private final INetd mNetd; 193 private LinkProperties mLinkProperties = new LinkProperties(); 194 private Map<InetAddress, NeighborEvent> mNeighborWatchList = new HashMap<>(); 195 // Time in milliseconds of the last forced probe request. 196 private volatile long mLastProbeTimeMs; 197 private int mNumSolicits; 198 private int mInterSolicitIntervalMs; 199 200 public IpReachabilityMonitor( 201 Context context, InterfaceParams ifParams, Handler h, SharedLog log, Callback callback, 202 boolean usingMultinetworkPolicyTracker, final INetd netd) { 203 this(context, ifParams, h, log, callback, usingMultinetworkPolicyTracker, 204 Dependencies.makeDefault(context, ifParams.name), new IpConnectivityLog(), netd); 205 } 206 207 @VisibleForTesting 208 IpReachabilityMonitor(Context context, InterfaceParams ifParams, Handler h, SharedLog log, 209 Callback callback, boolean usingMultinetworkPolicyTracker, Dependencies dependencies, 210 final IpConnectivityLog metricsLog, final INetd netd) { 211 if (ifParams == null) throw new IllegalArgumentException("null InterfaceParams"); 212 213 mContext = context; 214 mInterfaceParams = ifParams; 215 mLog = log.forSubComponent(TAG); 216 mCallback = callback; 217 mUsingMultinetworkPolicyTracker = usingMultinetworkPolicyTracker; 218 mCm = context.getSystemService(ConnectivityManager.class); 219 mDependencies = dependencies; 220 mMetricsLog = metricsLog; 221 mNetd = netd; 222 Preconditions.checkNotNull(mNetd); 223 Preconditions.checkArgument(!TextUtils.isEmpty(mInterfaceParams.name)); 224 225 // In case the overylaid parameters specify an invalid configuration, set the parameters 226 // to the hardcoded defaults first, then set them to the values used in the steady state. 227 try { 228 setNeighborParameters(MIN_NUD_SOLICIT_NUM, MIN_NUD_SOLICIT_INTERVAL_MS); 229 } catch (Exception e) { 230 Log.e(TAG, "Failed to adjust neighbor parameters with hardcoded defaults"); 231 } 232 setNeighbourParametersForSteadyState(); 233 234 mIpNeighborMonitor = mDependencies.makeIpNeighborMonitor(h, mLog, 235 (NeighborEvent event) -> { 236 if (mInterfaceParams.index != event.ifindex) return; 237 if (!mNeighborWatchList.containsKey(event.ip)) return; 238 239 final NeighborEvent prev = mNeighborWatchList.put(event.ip, event); 240 241 // TODO: Consider what to do with other states that are not within 242 // NeighborEvent#isValid() (i.e. NUD_NONE, NUD_INCOMPLETE). 243 if (event.nudState == StructNdMsg.NUD_FAILED) { 244 mLog.w("ALERT neighbor went from: " + prev + " to: " + event); 245 handleNeighborLost(event); 246 } else if (event.nudState == StructNdMsg.NUD_REACHABLE) { 247 maybeRestoreNeighborParameters(); 248 } 249 }); 250 mIpNeighborMonitor.start(); 251 } 252 253 public void stop() { 254 mIpNeighborMonitor.stop(); 255 clearLinkProperties(); 256 } 257 258 public void dump(PrintWriter pw) { 259 if (Looper.myLooper() == mIpNeighborMonitor.getHandler().getLooper()) { 260 pw.println(describeWatchList("\n")); 261 return; 262 } 263 264 final ConditionVariable cv = new ConditionVariable(false); 265 mIpNeighborMonitor.getHandler().post(() -> { 266 pw.println(describeWatchList("\n")); 267 cv.open(); 268 }); 269 270 if (!cv.block(1000)) { 271 pw.println("Timed out waiting for IpReachabilityMonitor dump"); 272 } 273 } 274 275 private String describeWatchList() { return describeWatchList(" "); } 276 277 private String describeWatchList(String sep) { 278 final StringBuilder sb = new StringBuilder(); 279 sb.append("iface{" + mInterfaceParams + "}," + sep); 280 sb.append("ntable=[" + sep); 281 String delimiter = ""; 282 for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) { 283 sb.append(delimiter).append(entry.getKey().getHostAddress() + "/" + entry.getValue()); 284 delimiter = "," + sep; 285 } 286 sb.append("]"); 287 return sb.toString(); 288 } 289 290 private static boolean isOnLink(List<RouteInfo> routes, InetAddress ip) { 291 for (RouteInfo route : routes) { 292 if (!route.hasGateway() && route.matches(ip)) { 293 return true; 294 } 295 } 296 return false; 297 } 298 299 public void updateLinkProperties(LinkProperties lp) { 300 if (!mInterfaceParams.name.equals(lp.getInterfaceName())) { 301 // TODO: figure out whether / how to cope with interface changes. 302 Log.wtf(TAG, "requested LinkProperties interface '" + lp.getInterfaceName() + 303 "' does not match: " + mInterfaceParams.name); 304 return; 305 } 306 307 mLinkProperties = new LinkProperties(lp); 308 Map<InetAddress, NeighborEvent> newNeighborWatchList = new HashMap<>(); 309 310 final List<RouteInfo> routes = mLinkProperties.getRoutes(); 311 for (RouteInfo route : routes) { 312 if (route.hasGateway()) { 313 InetAddress gw = route.getGateway(); 314 if (isOnLink(routes, gw)) { 315 newNeighborWatchList.put(gw, mNeighborWatchList.getOrDefault(gw, null)); 316 } 317 } 318 } 319 320 for (InetAddress dns : lp.getDnsServers()) { 321 if (isOnLink(routes, dns)) { 322 newNeighborWatchList.put(dns, mNeighborWatchList.getOrDefault(dns, null)); 323 } 324 } 325 326 mNeighborWatchList = newNeighborWatchList; 327 if (DBG) { Log.d(TAG, "watch: " + describeWatchList()); } 328 } 329 330 public void clearLinkProperties() { 331 mLinkProperties.clear(); 332 mNeighborWatchList.clear(); 333 if (DBG) { Log.d(TAG, "clear: " + describeWatchList()); } 334 } 335 336 private void handleNeighborLost(NeighborEvent event) { 337 final LinkProperties whatIfLp = new LinkProperties(mLinkProperties); 338 339 InetAddress ip = null; 340 for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) { 341 // TODO: Consider using NeighborEvent#isValid() here; it's more 342 // strict but may interact badly if other entries are somehow in 343 // NUD_INCOMPLETE (say, during network attach). 344 final NeighborEvent val = entry.getValue(); 345 346 // Find all the neighbors that have gone into FAILED state. 347 // Ignore entries for which we have never received an event. If there are neighbors 348 // that never respond to ARP/ND, the kernel will send several FAILED events, then 349 // an INCOMPLETE event, and then more FAILED events. The INCOMPLETE event will 350 // populate the map and the subsequent FAILED event will be processed. 351 if (val == null || val.nudState != StructNdMsg.NUD_FAILED) continue; 352 353 ip = entry.getKey(); 354 for (RouteInfo route : mLinkProperties.getRoutes()) { 355 if (ip.equals(route.getGateway())) { 356 whatIfLp.removeRoute(route); 357 } 358 } 359 360 if (avoidingBadLinks() || !(ip instanceof Inet6Address)) { 361 // We should do this unconditionally, but alas we cannot: b/31827713. 362 whatIfLp.removeDnsServer(ip); 363 } 364 } 365 366 final boolean lostProvisioning = 367 (mLinkProperties.isIpv4Provisioned() && !whatIfLp.isIpv4Provisioned()) 368 || (mLinkProperties.isIpv6Provisioned() && !whatIfLp.isIpv6Provisioned()); 369 370 if (lostProvisioning) { 371 final String logMsg = "FAILURE: LOST_PROVISIONING, " + event; 372 Log.w(TAG, logMsg); 373 if (mCallback != null) { 374 // TODO: remove |ip| when the callback signature no longer has 375 // an InetAddress argument. 376 mCallback.notifyLost(ip, logMsg); 377 } 378 } 379 logNudFailed(lostProvisioning); 380 } 381 382 private void maybeRestoreNeighborParameters() { 383 for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) { 384 if (DBG) { 385 Log.d(TAG, "neighbour IPv4(v6): " + entry.getKey() + " neighbour state: " 386 + StructNdMsg.stringForNudState(entry.getValue().nudState)); 387 } 388 final NeighborEvent val = entry.getValue(); 389 // If an entry is null, consider that probing for that neighbour has completed. 390 if (val == null || val.nudState != StructNdMsg.NUD_REACHABLE) return; 391 } 392 393 // Probing for all neighbours in the watchlist is complete and the connection is stable, 394 // restore NUD probe parameters to steadystate value. In the case where neighbours 395 // are responsive, this code will run before the wakelock expires. 396 setNeighbourParametersForSteadyState(); 397 } 398 399 private boolean avoidingBadLinks() { 400 return !mUsingMultinetworkPolicyTracker || mCm.shouldAvoidBadWifi(); 401 } 402 403 public void probeAll() { 404 setNeighbourParametersPostRoaming(); 405 406 final List<InetAddress> ipProbeList = new ArrayList<>(mNeighborWatchList.keySet()); 407 if (!ipProbeList.isEmpty()) { 408 // Keep the CPU awake long enough to allow all ARP/ND 409 // probes a reasonable chance at success. See b/23197666. 410 // 411 // The wakelock we use is (by default) refcounted, and this version 412 // of acquire(timeout) queues a release message to keep acquisitions 413 // and releases balanced. 414 mDependencies.acquireWakeLock(getProbeWakeLockDuration()); 415 } 416 417 for (InetAddress ip : ipProbeList) { 418 final int rval = IpNeighborMonitor.startKernelNeighborProbe(mInterfaceParams.index, ip); 419 mLog.log(String.format("put neighbor %s into NUD_PROBE state (rval=%d)", 420 ip.getHostAddress(), rval)); 421 logEvent(IpReachabilityEvent.PROBE, rval); 422 } 423 mLastProbeTimeMs = SystemClock.elapsedRealtime(); 424 } 425 426 private long getProbeWakeLockDuration() { 427 final long gracePeriodMs = 500; 428 return (long) (mNumSolicits * mInterSolicitIntervalMs) + gracePeriodMs; 429 } 430 431 private void setNeighbourParametersPostRoaming() { 432 setNeighborParametersFromResources(R.integer.config_nud_postroaming_solicit_num, 433 R.integer.config_nud_postroaming_solicit_interval); 434 } 435 436 private void setNeighbourParametersForSteadyState() { 437 setNeighborParametersFromResources(R.integer.config_nud_steadystate_solicit_num, 438 R.integer.config_nud_steadystate_solicit_interval); 439 } 440 441 private void setNeighborParametersFromResources(final int numResId, final int intervalResId) { 442 try { 443 final int numSolicits = mContext.getResources().getInteger(numResId); 444 final int interSolicitIntervalMs = mContext.getResources().getInteger(intervalResId); 445 setNeighborParameters(numSolicits, interSolicitIntervalMs); 446 } catch (Exception e) { 447 Log.e(TAG, "Failed to adjust neighbor parameters"); 448 } 449 } 450 451 private void setNeighborParameters(int numSolicits, int interSolicitIntervalMs) 452 throws RemoteException, IllegalArgumentException { 453 Preconditions.checkArgument(numSolicits >= MIN_NUD_SOLICIT_NUM, 454 "numSolicits must be at least " + MIN_NUD_SOLICIT_NUM); 455 Preconditions.checkArgument(numSolicits <= MAX_NUD_SOLICIT_NUM, 456 "numSolicits must be at most " + MAX_NUD_SOLICIT_NUM); 457 Preconditions.checkArgument(interSolicitIntervalMs >= MIN_NUD_SOLICIT_INTERVAL_MS, 458 "interSolicitIntervalMs must be at least " + MIN_NUD_SOLICIT_INTERVAL_MS); 459 Preconditions.checkArgument(interSolicitIntervalMs <= MAX_NUD_SOLICIT_INTERVAL_MS, 460 "interSolicitIntervalMs must be at most " + MAX_NUD_SOLICIT_INTERVAL_MS); 461 462 for (int family : new Integer[]{INetd.IPV4, INetd.IPV6}) { 463 mNetd.setProcSysNet(family, INetd.NEIGH, mInterfaceParams.name, "retrans_time_ms", 464 Integer.toString(interSolicitIntervalMs)); 465 mNetd.setProcSysNet(family, INetd.NEIGH, mInterfaceParams.name, "ucast_solicit", 466 Integer.toString(numSolicits)); 467 } 468 469 mNumSolicits = numSolicits; 470 mInterSolicitIntervalMs = interSolicitIntervalMs; 471 } 472 473 private void logEvent(int probeType, int errorCode) { 474 int eventType = probeType | (errorCode & 0xff); 475 mMetricsLog.log(mInterfaceParams.name, new IpReachabilityEvent(eventType)); 476 } 477 478 private void logNudFailed(boolean lostProvisioning) { 479 long duration = SystemClock.elapsedRealtime() - mLastProbeTimeMs; 480 boolean isFromProbe = (duration < getProbeWakeLockDuration()); 481 int eventType = nudFailureEventType(isFromProbe, lostProvisioning); 482 mMetricsLog.log(mInterfaceParams.name, new IpReachabilityEvent(eventType)); 483 } 484 485 /** 486 * Returns the NUD failure event type code corresponding to the given conditions. 487 */ 488 private static int nudFailureEventType(boolean isFromProbe, boolean isProvisioningLost) { 489 if (isFromProbe) { 490 return isProvisioningLost ? PROVISIONING_LOST : NUD_FAILED; 491 } else { 492 return isProvisioningLost ? PROVISIONING_LOST_ORGANIC : NUD_FAILED_ORGANIC; 493 } 494 } 495 } 496