1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.net.ip;
18 
19 import static android.net.metrics.IpReachabilityEvent.NUD_FAILED;
20 import static android.net.metrics.IpReachabilityEvent.NUD_FAILED_ORGANIC;
21 import static android.net.metrics.IpReachabilityEvent.PROVISIONING_LOST;
22 import static android.net.metrics.IpReachabilityEvent.PROVISIONING_LOST_ORGANIC;
23 
24 import android.content.Context;
25 import android.net.ConnectivityManager;
26 import android.net.INetd;
27 import android.net.LinkProperties;
28 import android.net.RouteInfo;
29 import android.net.ip.IpNeighborMonitor.NeighborEvent;
30 import android.net.ip.IpNeighborMonitor.NeighborEventConsumer;
31 import android.net.metrics.IpConnectivityLog;
32 import android.net.metrics.IpReachabilityEvent;
33 import android.net.netlink.StructNdMsg;
34 import android.net.util.InterfaceParams;
35 import android.net.util.SharedLog;
36 import android.os.ConditionVariable;
37 import android.os.Handler;
38 import android.os.Looper;
39 import android.os.PowerManager;
40 import android.os.PowerManager.WakeLock;
41 import android.os.RemoteException;
42 import android.os.SystemClock;
43 import android.text.TextUtils;
44 import android.util.Log;
45 
46 import com.android.internal.annotations.VisibleForTesting;
47 import com.android.internal.util.Preconditions;
48 import com.android.networkstack.R;
49 
50 import java.io.PrintWriter;
51 import java.net.Inet6Address;
52 import java.net.InetAddress;
53 import java.util.ArrayList;
54 import java.util.HashMap;
55 import java.util.List;
56 import java.util.Map;
57 
58 
59 /**
60  * IpReachabilityMonitor.
61  *
62  * Monitors on-link IP reachability and notifies callers whenever any on-link
63  * addresses of interest appear to have become unresponsive.
64  *
65  * This code does not concern itself with "why" a neighbour might have become
66  * unreachable. Instead, it primarily reacts to the kernel's notion of IP
67  * reachability for each of the neighbours we know to be critically important
68  * to normal network connectivity. As such, it is often "just the messenger":
69  * the neighbours about which it warns are already deemed by the kernel to have
70  * become unreachable.
71  *
72  *
73  * How it works:
74  *
75  *   1. The "on-link neighbours of interest" found in a given LinkProperties
76  *      instance are added to a "watch list" via #updateLinkProperties().
77  *      This usually means all default gateways and any on-link DNS servers.
78  *
79  *   2. We listen continuously for netlink neighbour messages (RTM_NEWNEIGH,
80  *      RTM_DELNEIGH), watching only for neighbours in the watch list.
81  *
82  *        - A neighbour going into NUD_REACHABLE, NUD_STALE, NUD_DELAY, and
83  *          even NUD_PROBE is perfectly normal; we merely record the new state.
84  *
85  *        - A neighbour's entry may be deleted (RTM_DELNEIGH), for example due
86  *          to garbage collection.  This is not necessarily of immediate
87  *          concern; we record the neighbour as moving to NUD_NONE.
88  *
89  *        - A neighbour transitioning to NUD_FAILED (for any reason) is
90  *          critically important and is handled as described below in #4.
91  *
92  *   3. All on-link neighbours in the watch list can be forcibly "probed" by
93  *      calling #probeAll(). This should be called whenever it is important to
94  *      verify that critical neighbours on the link are still reachable, e.g.
95  *      when roaming between BSSIDs.
96  *
97  *        - The kernel will send unicast ARP requests for IPv4 neighbours and
98  *          unicast NS packets for IPv6 neighbours.  The expected replies will
99  *          likely be unicast.
100  *
101  *        - The forced probing is done holding a wakelock. The kernel may,
102  *          however, initiate probing of a neighbor on its own, i.e. whenever
103  *          a neighbour has expired from NUD_DELAY.
104  *
105  *        - The kernel sends:
106  *
107  *              /proc/sys/net/ipv{4,6}/neigh/<ifname>/ucast_solicit
108  *
109  *          number of probes (usually 3) every:
110  *
111  *              /proc/sys/net/ipv{4,6}/neigh/<ifname>/retrans_time_ms
112  *
113  *          number of milliseconds (usually 1000ms). This normally results in
114  *          3 unicast packets, 1 per second.
115  *
116  *        - If no response is received to any of the probe packets, the kernel
117  *          marks the neighbour as being in state NUD_FAILED, and the listening
118  *          process in #2 will learn of it.
119  *
120  *   4. We call the supplied Callback#notifyLost() function if the loss of a
121  *      neighbour in NUD_FAILED would cause IPv4 or IPv6 configuration to
122  *      become incomplete (a loss of provisioning).
123  *
124  *        - For example, losing all our IPv4 on-link DNS servers (or losing
125  *          our only IPv6 default gateway) constitutes a loss of IPv4 (IPv6)
126  *          provisioning; Callback#notifyLost() would be called.
127  *
128  *        - Since it can be non-trivial to reacquire certain IP provisioning
129  *          state it may be best for the link to disconnect completely and
130  *          reconnect afresh.
131  *
132  * Accessing an instance of this class from multiple threads is NOT safe.
133  *
134  * @hide
135  */
136 public class IpReachabilityMonitor {
137     private static final String TAG = "IpReachabilityMonitor";
138     private static final boolean DBG = Log.isLoggable(TAG, Log.DEBUG);
139     private static final boolean VDBG = Log.isLoggable(TAG, Log.VERBOSE);
140 
141     // Upper and lower bound for NUD probe parameters.
142     protected static final int MAX_NUD_SOLICIT_NUM = 15;
143     protected static final int MIN_NUD_SOLICIT_NUM = 5;
144     protected static final int MAX_NUD_SOLICIT_INTERVAL_MS = 1000;
145     protected static final int MIN_NUD_SOLICIT_INTERVAL_MS = 750;
146 
147     public interface Callback {
148         /**
149          * This callback function must execute as quickly as possible as it is
150          * run on the same thread that listens to kernel neighbor updates.
151          *
152          * TODO: refactor to something like notifyProvisioningLost(String msg).
153          */
notifyLost(InetAddress ip, String logMsg)154         void notifyLost(InetAddress ip, String logMsg);
155     }
156 
157     /**
158      * Encapsulates IpReachabilityMonitor dependencies on systems that hinder unit testing.
159      * TODO: consider also wrapping MultinetworkPolicyTracker in this interface.
160      */
161     interface Dependencies {
acquireWakeLock(long durationMs)162         void acquireWakeLock(long durationMs);
makeIpNeighborMonitor(Handler h, SharedLog log, NeighborEventConsumer cb)163         IpNeighborMonitor makeIpNeighborMonitor(Handler h, SharedLog log, NeighborEventConsumer cb);
164 
makeDefault(Context context, String iface)165         static Dependencies makeDefault(Context context, String iface) {
166             final String lockName = TAG + "." + iface;
167             final PowerManager pm = (PowerManager) context.getSystemService(Context.POWER_SERVICE);
168             final WakeLock lock = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, lockName);
169 
170             return new Dependencies() {
171                 public void acquireWakeLock(long durationMs) {
172                     lock.acquire(durationMs);
173                 }
174 
175                 public IpNeighborMonitor makeIpNeighborMonitor(Handler h, SharedLog log,
176                         NeighborEventConsumer cb) {
177                     return new IpNeighborMonitor(h, log, cb);
178                 }
179             };
180         }
181     }
182 
183     private final InterfaceParams mInterfaceParams;
184     private final IpNeighborMonitor mIpNeighborMonitor;
185     private final SharedLog mLog;
186     private final Callback mCallback;
187     private final Dependencies mDependencies;
188     private final boolean mUsingMultinetworkPolicyTracker;
189     private final ConnectivityManager mCm;
190     private final IpConnectivityLog mMetricsLog;
191     private final Context mContext;
192     private final INetd mNetd;
193     private LinkProperties mLinkProperties = new LinkProperties();
194     private Map<InetAddress, NeighborEvent> mNeighborWatchList = new HashMap<>();
195     // Time in milliseconds of the last forced probe request.
196     private volatile long mLastProbeTimeMs;
197     private int mNumSolicits;
198     private int mInterSolicitIntervalMs;
199 
200     public IpReachabilityMonitor(
201             Context context, InterfaceParams ifParams, Handler h, SharedLog log, Callback callback,
202             boolean usingMultinetworkPolicyTracker, final INetd netd) {
203         this(context, ifParams, h, log, callback, usingMultinetworkPolicyTracker,
204                 Dependencies.makeDefault(context, ifParams.name), new IpConnectivityLog(), netd);
205     }
206 
207     @VisibleForTesting
208     IpReachabilityMonitor(Context context, InterfaceParams ifParams, Handler h, SharedLog log,
209             Callback callback, boolean usingMultinetworkPolicyTracker, Dependencies dependencies,
210             final IpConnectivityLog metricsLog, final INetd netd) {
211         if (ifParams == null) throw new IllegalArgumentException("null InterfaceParams");
212 
213         mContext = context;
214         mInterfaceParams = ifParams;
215         mLog = log.forSubComponent(TAG);
216         mCallback = callback;
217         mUsingMultinetworkPolicyTracker = usingMultinetworkPolicyTracker;
218         mCm = context.getSystemService(ConnectivityManager.class);
219         mDependencies = dependencies;
220         mMetricsLog = metricsLog;
221         mNetd = netd;
222         Preconditions.checkNotNull(mNetd);
223         Preconditions.checkArgument(!TextUtils.isEmpty(mInterfaceParams.name));
224 
225         // In case the overylaid parameters specify an invalid configuration, set the parameters
226         // to the hardcoded defaults first, then set them to the values used in the steady state.
227         try {
228             setNeighborParameters(MIN_NUD_SOLICIT_NUM, MIN_NUD_SOLICIT_INTERVAL_MS);
229         } catch (Exception e) {
230             Log.e(TAG, "Failed to adjust neighbor parameters with hardcoded defaults");
231         }
232         setNeighbourParametersForSteadyState();
233 
234         mIpNeighborMonitor = mDependencies.makeIpNeighborMonitor(h, mLog,
235                 (NeighborEvent event) -> {
236                     if (mInterfaceParams.index != event.ifindex) return;
237                     if (!mNeighborWatchList.containsKey(event.ip)) return;
238 
239                     final NeighborEvent prev = mNeighborWatchList.put(event.ip, event);
240 
241                     // TODO: Consider what to do with other states that are not within
242                     // NeighborEvent#isValid() (i.e. NUD_NONE, NUD_INCOMPLETE).
243                     if (event.nudState == StructNdMsg.NUD_FAILED) {
244                         mLog.w("ALERT neighbor went from: " + prev + " to: " + event);
245                         handleNeighborLost(event);
246                     } else if (event.nudState == StructNdMsg.NUD_REACHABLE) {
247                         maybeRestoreNeighborParameters();
248                     }
249                 });
250         mIpNeighborMonitor.start();
251     }
252 
253     public void stop() {
254         mIpNeighborMonitor.stop();
255         clearLinkProperties();
256     }
257 
258     public void dump(PrintWriter pw) {
259         if (Looper.myLooper() == mIpNeighborMonitor.getHandler().getLooper()) {
260             pw.println(describeWatchList("\n"));
261             return;
262         }
263 
264         final ConditionVariable cv = new ConditionVariable(false);
265         mIpNeighborMonitor.getHandler().post(() -> {
266             pw.println(describeWatchList("\n"));
267             cv.open();
268         });
269 
270         if (!cv.block(1000)) {
271             pw.println("Timed out waiting for IpReachabilityMonitor dump");
272         }
273     }
274 
275     private String describeWatchList() { return describeWatchList(" "); }
276 
277     private String describeWatchList(String sep) {
278         final StringBuilder sb = new StringBuilder();
279         sb.append("iface{" + mInterfaceParams + "}," + sep);
280         sb.append("ntable=[" + sep);
281         String delimiter = "";
282         for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) {
283             sb.append(delimiter).append(entry.getKey().getHostAddress() + "/" + entry.getValue());
284             delimiter = "," + sep;
285         }
286         sb.append("]");
287         return sb.toString();
288     }
289 
290     private static boolean isOnLink(List<RouteInfo> routes, InetAddress ip) {
291         for (RouteInfo route : routes) {
292             if (!route.hasGateway() && route.matches(ip)) {
293                 return true;
294             }
295         }
296         return false;
297     }
298 
299     public void updateLinkProperties(LinkProperties lp) {
300         if (!mInterfaceParams.name.equals(lp.getInterfaceName())) {
301             // TODO: figure out whether / how to cope with interface changes.
302             Log.wtf(TAG, "requested LinkProperties interface '" + lp.getInterfaceName() +
303                     "' does not match: " + mInterfaceParams.name);
304             return;
305         }
306 
307         mLinkProperties = new LinkProperties(lp);
308         Map<InetAddress, NeighborEvent> newNeighborWatchList = new HashMap<>();
309 
310         final List<RouteInfo> routes = mLinkProperties.getRoutes();
311         for (RouteInfo route : routes) {
312             if (route.hasGateway()) {
313                 InetAddress gw = route.getGateway();
314                 if (isOnLink(routes, gw)) {
315                     newNeighborWatchList.put(gw, mNeighborWatchList.getOrDefault(gw, null));
316                 }
317             }
318         }
319 
320         for (InetAddress dns : lp.getDnsServers()) {
321             if (isOnLink(routes, dns)) {
322                 newNeighborWatchList.put(dns, mNeighborWatchList.getOrDefault(dns, null));
323             }
324         }
325 
326         mNeighborWatchList = newNeighborWatchList;
327         if (DBG) { Log.d(TAG, "watch: " + describeWatchList()); }
328     }
329 
330     public void clearLinkProperties() {
331         mLinkProperties.clear();
332         mNeighborWatchList.clear();
333         if (DBG) { Log.d(TAG, "clear: " + describeWatchList()); }
334     }
335 
336     private void handleNeighborLost(NeighborEvent event) {
337         final LinkProperties whatIfLp = new LinkProperties(mLinkProperties);
338 
339         InetAddress ip = null;
340         for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) {
341             // TODO: Consider using NeighborEvent#isValid() here; it's more
342             // strict but may interact badly if other entries are somehow in
343             // NUD_INCOMPLETE (say, during network attach).
344             final NeighborEvent val = entry.getValue();
345 
346             // Find all the neighbors that have gone into FAILED state.
347             // Ignore entries for which we have never received an event. If there are neighbors
348             // that never respond to ARP/ND, the kernel will send several FAILED events, then
349             // an INCOMPLETE event, and then more FAILED events. The INCOMPLETE event will
350             // populate the map and the subsequent FAILED event will be processed.
351             if (val == null || val.nudState != StructNdMsg.NUD_FAILED) continue;
352 
353             ip = entry.getKey();
354             for (RouteInfo route : mLinkProperties.getRoutes()) {
355                 if (ip.equals(route.getGateway())) {
356                     whatIfLp.removeRoute(route);
357                 }
358             }
359 
360             if (avoidingBadLinks() || !(ip instanceof Inet6Address)) {
361                 // We should do this unconditionally, but alas we cannot: b/31827713.
362                 whatIfLp.removeDnsServer(ip);
363             }
364         }
365 
366         final boolean lostProvisioning =
367                 (mLinkProperties.isIpv4Provisioned() && !whatIfLp.isIpv4Provisioned())
368                 || (mLinkProperties.isIpv6Provisioned() && !whatIfLp.isIpv6Provisioned());
369 
370         if (lostProvisioning) {
371             final String logMsg = "FAILURE: LOST_PROVISIONING, " + event;
372             Log.w(TAG, logMsg);
373             if (mCallback != null) {
374                 // TODO: remove |ip| when the callback signature no longer has
375                 // an InetAddress argument.
376                 mCallback.notifyLost(ip, logMsg);
377             }
378         }
379         logNudFailed(lostProvisioning);
380     }
381 
382     private void maybeRestoreNeighborParameters() {
383         for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) {
384             if (DBG) {
385                 Log.d(TAG, "neighbour IPv4(v6): " + entry.getKey() + " neighbour state: "
386                         + StructNdMsg.stringForNudState(entry.getValue().nudState));
387             }
388             final NeighborEvent val = entry.getValue();
389             // If an entry is null, consider that probing for that neighbour has completed.
390             if (val == null || val.nudState != StructNdMsg.NUD_REACHABLE) return;
391         }
392 
393         // Probing for all neighbours in the watchlist is complete and the connection is stable,
394         // restore NUD probe parameters to steadystate value. In the case where neighbours
395         // are responsive, this code will run before the wakelock expires.
396         setNeighbourParametersForSteadyState();
397     }
398 
399     private boolean avoidingBadLinks() {
400         return !mUsingMultinetworkPolicyTracker || mCm.shouldAvoidBadWifi();
401     }
402 
403     public void probeAll() {
404         setNeighbourParametersPostRoaming();
405 
406         final List<InetAddress> ipProbeList = new ArrayList<>(mNeighborWatchList.keySet());
407         if (!ipProbeList.isEmpty()) {
408             // Keep the CPU awake long enough to allow all ARP/ND
409             // probes a reasonable chance at success. See b/23197666.
410             //
411             // The wakelock we use is (by default) refcounted, and this version
412             // of acquire(timeout) queues a release message to keep acquisitions
413             // and releases balanced.
414             mDependencies.acquireWakeLock(getProbeWakeLockDuration());
415         }
416 
417         for (InetAddress ip : ipProbeList) {
418             final int rval = IpNeighborMonitor.startKernelNeighborProbe(mInterfaceParams.index, ip);
419             mLog.log(String.format("put neighbor %s into NUD_PROBE state (rval=%d)",
420                      ip.getHostAddress(), rval));
421             logEvent(IpReachabilityEvent.PROBE, rval);
422         }
423         mLastProbeTimeMs = SystemClock.elapsedRealtime();
424     }
425 
426     private long getProbeWakeLockDuration() {
427         final long gracePeriodMs = 500;
428         return (long) (mNumSolicits * mInterSolicitIntervalMs) + gracePeriodMs;
429     }
430 
431     private void setNeighbourParametersPostRoaming() {
432         setNeighborParametersFromResources(R.integer.config_nud_postroaming_solicit_num,
433                 R.integer.config_nud_postroaming_solicit_interval);
434     }
435 
436     private void setNeighbourParametersForSteadyState() {
437         setNeighborParametersFromResources(R.integer.config_nud_steadystate_solicit_num,
438                 R.integer.config_nud_steadystate_solicit_interval);
439     }
440 
441     private void setNeighborParametersFromResources(final int numResId, final int intervalResId) {
442         try {
443             final int numSolicits = mContext.getResources().getInteger(numResId);
444             final int interSolicitIntervalMs = mContext.getResources().getInteger(intervalResId);
445             setNeighborParameters(numSolicits, interSolicitIntervalMs);
446         } catch (Exception e) {
447             Log.e(TAG, "Failed to adjust neighbor parameters");
448         }
449     }
450 
451     private void setNeighborParameters(int numSolicits, int interSolicitIntervalMs)
452             throws RemoteException, IllegalArgumentException {
453         Preconditions.checkArgument(numSolicits >= MIN_NUD_SOLICIT_NUM,
454                 "numSolicits must be at least " + MIN_NUD_SOLICIT_NUM);
455         Preconditions.checkArgument(numSolicits <= MAX_NUD_SOLICIT_NUM,
456                 "numSolicits must be at most " + MAX_NUD_SOLICIT_NUM);
457         Preconditions.checkArgument(interSolicitIntervalMs >= MIN_NUD_SOLICIT_INTERVAL_MS,
458                 "interSolicitIntervalMs must be at least " + MIN_NUD_SOLICIT_INTERVAL_MS);
459         Preconditions.checkArgument(interSolicitIntervalMs <= MAX_NUD_SOLICIT_INTERVAL_MS,
460                 "interSolicitIntervalMs must be at most " + MAX_NUD_SOLICIT_INTERVAL_MS);
461 
462         for (int family : new Integer[]{INetd.IPV4, INetd.IPV6}) {
463             mNetd.setProcSysNet(family, INetd.NEIGH, mInterfaceParams.name, "retrans_time_ms",
464                     Integer.toString(interSolicitIntervalMs));
465             mNetd.setProcSysNet(family, INetd.NEIGH, mInterfaceParams.name, "ucast_solicit",
466                     Integer.toString(numSolicits));
467         }
468 
469         mNumSolicits = numSolicits;
470         mInterSolicitIntervalMs = interSolicitIntervalMs;
471     }
472 
473     private void logEvent(int probeType, int errorCode) {
474         int eventType = probeType | (errorCode & 0xff);
475         mMetricsLog.log(mInterfaceParams.name, new IpReachabilityEvent(eventType));
476     }
477 
478     private void logNudFailed(boolean lostProvisioning) {
479         long duration = SystemClock.elapsedRealtime() - mLastProbeTimeMs;
480         boolean isFromProbe = (duration < getProbeWakeLockDuration());
481         int eventType = nudFailureEventType(isFromProbe, lostProvisioning);
482         mMetricsLog.log(mInterfaceParams.name, new IpReachabilityEvent(eventType));
483     }
484 
485     /**
486      * Returns the NUD failure event type code corresponding to the given conditions.
487      */
488     private static int nudFailureEventType(boolean isFromProbe, boolean isProvisioningLost) {
489         if (isFromProbe) {
490             return isProvisioningLost ? PROVISIONING_LOST : NUD_FAILED;
491         } else {
492             return isProvisioningLost ? PROVISIONING_LOST_ORGANIC : NUD_FAILED_ORGANIC;
493         }
494     }
495 }
496