1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <map>
18 #include <string>
19 
20 #include <arpa/inet.h>
21 #include <errno.h>
22 #include <linux/if_tun.h>
23 #include <linux/ioctl.h>
24 #include <net/if.h>
25 #include <netinet/in.h>
26 #include <spawn.h>
27 #include <sys/types.h>
28 #include <sys/wait.h>
29 #include <unistd.h>
30 
31 #define LOG_TAG "ClatdController"
32 #include <log/log.h>
33 
34 #include "ClatdController.h"
35 #include "InterfaceController.h"
36 
37 #include "android-base/properties.h"
38 #include "android-base/scopeguard.h"
39 #include "android-base/stringprintf.h"
40 #include "android-base/unique_fd.h"
41 #include "bpf/BpfMap.h"
42 #include "netdbpf/bpf_shared.h"
43 #include "netdutils/DumpWriter.h"
44 
45 extern "C" {
46 #include "netutils/checksum.h"
47 }
48 
49 #include "Fwmark.h"
50 #include "NetdConstants.h"
51 #include "NetworkController.h"
52 #include "OffloadUtils.h"
53 #include "netid_client.h"
54 
55 static const char* kClatdPath = "/system/bin/clatd";
56 
57 // For historical reasons, start with 192.0.0.4, and after that, use all subsequent addresses in
58 // 192.0.0.0/29 (RFC 7335).
59 static const char* kV4AddrString = "192.0.0.4";
60 static const in_addr kV4Addr = {inet_addr(kV4AddrString)};
61 static const int kV4AddrLen = 29;
62 
63 using android::base::Result;
64 using android::base::StringPrintf;
65 using android::base::unique_fd;
66 using android::bpf::BpfMap;
67 using android::netdutils::DumpWriter;
68 using android::netdutils::ScopedIndent;
69 
70 namespace android {
71 namespace net {
72 
init(void)73 void ClatdController::init(void) {
74     std::lock_guard guard(mutex);
75 
76     // TODO: should refactor into separate function for testability
77     if (!bpf::isBpfSupported()) {
78         ALOGI("Pre-4.9 kernel or pre-P api shipping level - disabling clat ebpf.");
79         mClatEbpfMode = ClatEbpfDisabled;
80         return;
81     }
82 
83     // We know the device initially shipped with at least P...,
84     // but did it ship with at least Q?
85 
86     uint64_t api_level = base::GetUintProperty<uint64_t>("ro.product.first_api_level", 0);
87     if (api_level == 0) {
88         ALOGE("Cannot determine initial API level of the device.");
89         api_level = base::GetUintProperty<uint64_t>("ro.build.version.sdk", 0);
90     }
91 
92     // Note: MINIMUM_API_REQUIRED is for eBPF as a whole and is thus P
93     if (api_level > bpf::MINIMUM_API_REQUIRED) {
94         ALOGI("4.9+ kernel and device shipped with Q+ - clat ebpf should work.");
95         mClatEbpfMode = ClatEbpfEnabled;
96     } else {
97         // We cannot guarantee that 4.9-P kernels will include NET_CLS_BPF support.
98         ALOGI("4.9+ kernel and device shipped with P - clat ebpf might work.");
99         mClatEbpfMode = ClatEbpfMaybe;
100     }
101 
102     int rv = getClatEgressMapFd();
103     if (rv < 0) {
104         ALOGE("getClatEgressMapFd() failure: %s", strerror(-rv));
105         mClatEbpfMode = ClatEbpfDisabled;
106         return;
107     }
108     mClatEgressMap.reset(rv);
109 
110     rv = getClatIngressMapFd();
111     if (rv < 0) {
112         ALOGE("getClatIngressMapFd() failure: %s", strerror(-rv));
113         mClatEbpfMode = ClatEbpfDisabled;
114         mClatEgressMap.reset(-1);
115         return;
116     }
117     mClatIngressMap.reset(rv);
118 
119     mClatEgressMap.clear();
120     mClatIngressMap.clear();
121 }
122 
isIpv4AddressFree(in_addr_t addr)123 bool ClatdController::isIpv4AddressFree(in_addr_t addr) {
124     int s = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
125     if (s == -1) {
126         return 0;
127     }
128 
129     // Attempt to connect to the address. If the connection succeeds and getsockname returns the
130     // same then the address is already assigned to the system and we can't use it.
131     struct sockaddr_in sin = {
132             .sin_family = AF_INET,
133             .sin_port = htons(53),
134             .sin_addr = {addr},
135     };
136     socklen_t len = sizeof(sin);
137     bool inuse = connect(s, (struct sockaddr*)&sin, sizeof(sin)) == 0 &&
138                  getsockname(s, (struct sockaddr*)&sin, &len) == 0 && (size_t)len >= sizeof(sin) &&
139                  sin.sin_addr.s_addr == addr;
140 
141     close(s);
142     return !inuse;
143 }
144 
145 // Picks a free IPv4 address, starting from ip and trying all addresses in the prefix in order.
146 //   ip        - the IP address from the configuration file
147 //   prefixlen - the length of the prefix from which addresses may be selected.
148 //   returns: the IPv4 address, or INADDR_NONE if no addresses were available
selectIpv4Address(const in_addr ip,int16_t prefixlen)149 in_addr_t ClatdController::selectIpv4Address(const in_addr ip, int16_t prefixlen) {
150     // Don't accept prefixes that are too large because we scan addresses one by one.
151     if (prefixlen < 16 || prefixlen > 32) {
152         return INADDR_NONE;
153     }
154 
155     // All these are in host byte order.
156     in_addr_t mask = 0xffffffff >> (32 - prefixlen) << (32 - prefixlen);
157     in_addr_t ipv4 = ntohl(ip.s_addr);
158     in_addr_t first_ipv4 = ipv4;
159     in_addr_t prefix = ipv4 & mask;
160 
161     // Pick the first IPv4 address in the pool, wrapping around if necessary.
162     // So, for example, 192.0.0.4 -> 192.0.0.5 -> 192.0.0.6 -> 192.0.0.7 -> 192.0.0.0.
163     do {
164         if (isIpv4AddressFreeFunc(htonl(ipv4))) {
165             return htonl(ipv4);
166         }
167         ipv4 = prefix | ((ipv4 + 1) & ~mask);
168     } while (ipv4 != first_ipv4);
169 
170     return INADDR_NONE;
171 }
172 
173 // Alters the bits in the IPv6 address to make them checksum neutral with v4 and nat64Prefix.
makeChecksumNeutral(in6_addr * v6,const in_addr v4,const in6_addr & nat64Prefix)174 void ClatdController::makeChecksumNeutral(in6_addr* v6, const in_addr v4,
175                                           const in6_addr& nat64Prefix) {
176     // Fill last 8 bytes of IPv6 address with random bits.
177     arc4random_buf(&v6->s6_addr[8], 8);
178 
179     // Make the IID checksum-neutral. That is, make it so that:
180     //   checksum(Local IPv4 | Remote IPv4) = checksum(Local IPv6 | Remote IPv6)
181     // in other words (because remote IPv6 = NAT64 prefix | Remote IPv4):
182     //   checksum(Local IPv4) = checksum(Local IPv6 | NAT64 prefix)
183     // Do this by adjusting the two bytes in the middle of the IID.
184 
185     uint16_t middlebytes = (v6->s6_addr[11] << 8) + v6->s6_addr[12];
186 
187     uint32_t c1 = ip_checksum_add(0, &v4, sizeof(v4));
188     uint32_t c2 = ip_checksum_add(0, &nat64Prefix, sizeof(nat64Prefix)) +
189                   ip_checksum_add(0, v6, sizeof(*v6));
190 
191     uint16_t delta = ip_checksum_adjust(middlebytes, c1, c2);
192     v6->s6_addr[11] = delta >> 8;
193     v6->s6_addr[12] = delta & 0xff;
194 }
195 
196 // Picks a random interface ID that is checksum neutral with the IPv4 address and the NAT64 prefix.
generateIpv6Address(const char * iface,const in_addr v4,const in6_addr & nat64Prefix,in6_addr * v6)197 int ClatdController::generateIpv6Address(const char* iface, const in_addr v4,
198                                          const in6_addr& nat64Prefix, in6_addr* v6) {
199     unique_fd s(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
200     if (s == -1) return -errno;
201 
202     if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE, iface, strlen(iface) + 1) == -1) {
203         return -errno;
204     }
205 
206     sockaddr_in6 sin6 = {.sin6_family = AF_INET6, .sin6_addr = nat64Prefix};
207     if (connect(s, reinterpret_cast<struct sockaddr*>(&sin6), sizeof(sin6)) == -1) {
208         return -errno;
209     }
210 
211     socklen_t len = sizeof(sin6);
212     if (getsockname(s, reinterpret_cast<struct sockaddr*>(&sin6), &len) == -1) {
213         return -errno;
214     }
215 
216     *v6 = sin6.sin6_addr;
217 
218     if (IN6_IS_ADDR_UNSPECIFIED(v6) || IN6_IS_ADDR_LOOPBACK(v6) || IN6_IS_ADDR_LINKLOCAL(v6) ||
219         IN6_IS_ADDR_SITELOCAL(v6) || IN6_IS_ADDR_ULA(v6)) {
220         return -ENETUNREACH;
221     }
222 
223     makeChecksumNeutral(v6, v4, nat64Prefix);
224 
225     return 0;
226 }
227 
maybeStartBpf(const ClatdTracker & tracker)228 void ClatdController::maybeStartBpf(const ClatdTracker& tracker) {
229     if (mClatEbpfMode == ClatEbpfDisabled) return;
230 
231     auto isEthernet = android::net::isEthernet(tracker.iface);
232     if (!isEthernet.ok()) {
233         ALOGE("isEthernet(%s[%d]) failure: %s", tracker.iface, tracker.ifIndex,
234               isEthernet.error().message().c_str());
235         return;
236     }
237 
238     // This program will be attached to the v4-* interface which is a TUN and thus always rawip.
239     int rv = getClatEgressProgFd(RAWIP);
240     if (rv < 0) {
241         ALOGE("getClatEgressProgFd(RAWIP) failure: %s", strerror(-rv));
242         return;
243     }
244     unique_fd txRawIpProgFd(rv);
245 
246     rv = getClatIngressProgFd(isEthernet.value());
247     if (rv < 0) {
248         ALOGE("getClatIngressProgFd(%d) failure: %s", isEthernet.value(), strerror(-rv));
249         return;
250     }
251     unique_fd rxProgFd(rv);
252 
253     ClatEgressKey txKey = {
254             .iif = tracker.v4ifIndex,
255             .local4 = tracker.v4,
256     };
257     ClatEgressValue txValue = {
258             .oif = tracker.ifIndex,
259             .local6 = tracker.v6,
260             .pfx96 = tracker.pfx96,
261             .oifIsEthernet = isEthernet.value(),
262     };
263 
264     auto ret = mClatEgressMap.writeValue(txKey, txValue, BPF_ANY);
265     if (!ret.ok()) {
266         ALOGE("mClatEgressMap.writeValue failure: %s", strerror(ret.error().code()));
267         return;
268     }
269 
270     ClatIngressKey rxKey = {
271             .iif = tracker.ifIndex,
272             .pfx96 = tracker.pfx96,
273             .local6 = tracker.v6,
274     };
275     ClatIngressValue rxValue = {
276             // TODO: move all the clat code to eBPF and remove the tun interface entirely.
277             .oif = tracker.v4ifIndex,
278             .local4 = tracker.v4,
279     };
280 
281     ret = mClatIngressMap.writeValue(rxKey, rxValue, BPF_ANY);
282     if (!ret.ok()) {
283         ALOGE("mClatIngressMap.writeValue failure: %s", strerror(ret.error().code()));
284         ret = mClatEgressMap.deleteValue(txKey);
285         if (!ret.ok())
286             ALOGE("mClatEgressMap.deleteValue failure: %s", strerror(ret.error().code()));
287         return;
288     }
289 
290     // We do tc setup *after* populating the maps, so scanning through them
291     // can always be used to tell us what needs cleanup.
292 
293     // Usually the clsact will be added in RouteController::addInterfaceToPhysicalNetwork.
294     // But clat is started before the v4- interface is added to the network. The clat startup have
295     // to add clsact of v4- tun interface first for adding bpf filter in maybeStartBpf.
296     // TODO: move "qdisc add clsact" of v4- tun interface out from ClatdController.
297     rv = tcQdiscAddDevClsact(tracker.v4ifIndex);
298     if (rv) {
299         ALOGE("tcQdiscAddDevClsact(%d[%s]) failure: %s", tracker.v4ifIndex, tracker.v4iface,
300               strerror(-rv));
301         ret = mClatEgressMap.deleteValue(txKey);
302         if (!ret.ok())
303             ALOGE("mClatEgressMap.deleteValue failure: %s", strerror(ret.error().code()));
304         ret = mClatIngressMap.deleteValue(rxKey);
305         if (!ret.ok())
306             ALOGE("mClatIngressMap.deleteValue failure: %s", strerror(ret.error().code()));
307         return;
308     }
309 
310     rv = tcFilterAddDevEgressClatIpv4(tracker.v4ifIndex, txRawIpProgFd, RAWIP);
311     if (rv) {
312         if ((rv == -ENOENT) && (mClatEbpfMode == ClatEbpfMaybe)) {
313             ALOGI("tcFilterAddDevEgressClatIpv4(%d[%s], RAWIP): %s", tracker.v4ifIndex,
314                   tracker.v4iface, strerror(-rv));
315         } else {
316             ALOGE("tcFilterAddDevEgressClatIpv4(%d[%s], RAWIP) failure: %s", tracker.v4ifIndex,
317                   tracker.v4iface, strerror(-rv));
318         }
319 
320         // The v4- interface clsact is not deleted for unwinding error because once it is created
321         // with interface addition, the lifetime is till interface deletion. Moreover, the clsact
322         // has no clat filter now. It should not break anything.
323 
324         ret = mClatEgressMap.deleteValue(txKey);
325         if (!ret.ok())
326             ALOGE("mClatEgressMap.deleteValue failure: %s", strerror(ret.error().code()));
327         ret = mClatIngressMap.deleteValue(rxKey);
328         if (!ret.ok())
329             ALOGE("mClatIngressMap.deleteValue failure: %s", strerror(ret.error().code()));
330         return;
331     }
332 
333     rv = tcFilterAddDevIngressClatIpv6(tracker.ifIndex, rxProgFd, isEthernet.value());
334     if (rv) {
335         if ((rv == -ENOENT) && (mClatEbpfMode == ClatEbpfMaybe)) {
336             ALOGI("tcFilterAddDevIngressClatIpv6(%d[%s], %d): %s", tracker.ifIndex, tracker.iface,
337                   isEthernet.value(), strerror(-rv));
338         } else {
339             ALOGE("tcFilterAddDevIngressClatIpv6(%d[%s], %d) failure: %s", tracker.ifIndex,
340                   tracker.iface, isEthernet.value(), strerror(-rv));
341         }
342         rv = tcFilterDelDevEgressClatIpv4(tracker.v4ifIndex);
343         if (rv) {
344             ALOGE("tcFilterDelDevEgressClatIpv4(%d[%s]) failure: %s", tracker.v4ifIndex,
345                   tracker.v4iface, strerror(-rv));
346         }
347 
348         // The v4- interface clsact is not deleted. See the reason in the error unwinding code of
349         // the egress filter attaching of v4- tun interface.
350 
351         ret = mClatEgressMap.deleteValue(txKey);
352         if (!ret.ok())
353             ALOGE("mClatEgressMap.deleteValue failure: %s", strerror(ret.error().code()));
354         ret = mClatIngressMap.deleteValue(rxKey);
355         if (!ret.ok())
356             ALOGE("mClatIngressMap.deleteValue failure: %s", strerror(ret.error().code()));
357         return;
358     }
359 
360     // success
361 }
362 
setIptablesDropRule(bool add,const char * iface,const char * pfx96Str,const char * v6Str)363 void ClatdController::setIptablesDropRule(bool add, const char* iface, const char* pfx96Str,
364                                           const char* v6Str) {
365     std::string cmd = StringPrintf(
366             "*raw\n"
367             "%s %s -i %s -s %s/96 -d %s -j DROP\n"
368             "COMMIT\n",
369             (add ? "-A" : "-D"), LOCAL_RAW_PREROUTING, iface, pfx96Str, v6Str);
370 
371     iptablesRestoreFunction(V6, cmd);
372 }
373 
maybeStopBpf(const ClatdTracker & tracker)374 void ClatdController::maybeStopBpf(const ClatdTracker& tracker) {
375     if (mClatEbpfMode == ClatEbpfDisabled) return;
376 
377     int rv = tcFilterDelDevIngressClatIpv6(tracker.ifIndex);
378     if (rv < 0) {
379         ALOGE("tcFilterDelDevIngressClatIpv6(%d[%s]) failure: %s", tracker.ifIndex, tracker.iface,
380               strerror(-rv));
381     }
382 
383     rv = tcFilterDelDevEgressClatIpv4(tracker.v4ifIndex);
384     if (rv < 0) {
385         ALOGE("tcFilterDelDevEgressClatIpv4(%d[%s]) failure: %s", tracker.v4ifIndex,
386               tracker.v4iface, strerror(-rv));
387     }
388 
389     // We cleanup the maps last, so scanning through them can be used to
390     // determine what still needs cleanup.
391 
392     ClatEgressKey txKey = {
393             .iif = tracker.v4ifIndex,
394             .local4 = tracker.v4,
395     };
396 
397     auto ret = mClatEgressMap.deleteValue(txKey);
398     if (!ret.ok()) ALOGE("mClatEgressMap.deleteValue failure: %s", strerror(ret.error().code()));
399 
400     ClatIngressKey rxKey = {
401             .iif = tracker.ifIndex,
402             .pfx96 = tracker.pfx96,
403             .local6 = tracker.v6,
404     };
405 
406     ret = mClatIngressMap.deleteValue(rxKey);
407     if (!ret.ok()) ALOGE("mClatIngressMap.deleteValue failure: %s", strerror(ret.error().code()));
408 }
409 
410 // Finds the tracker of the clatd running on interface |interface|, or nullptr if clatd has not been
411 // started  on |interface|.
getClatdTracker(const std::string & interface)412 ClatdController::ClatdTracker* ClatdController::getClatdTracker(const std::string& interface) {
413     auto it = mClatdTrackers.find(interface);
414     return (it == mClatdTrackers.end() ? nullptr : &it->second);
415 }
416 
417 // Initializes a ClatdTracker for the specified interface.
init(unsigned networkId,const std::string & interface,const std::string & v4interface,const std::string & nat64Prefix)418 int ClatdController::ClatdTracker::init(unsigned networkId, const std::string& interface,
419                                         const std::string& v4interface,
420                                         const std::string& nat64Prefix) {
421     fwmark.netId = networkId;
422     fwmark.explicitlySelected = true;
423     fwmark.protectedFromVpn = true;
424     fwmark.permission = PERMISSION_SYSTEM;
425 
426     snprintf(fwmarkString, sizeof(fwmarkString), "0x%x", fwmark.intValue);
427     strlcpy(iface, interface.c_str(), sizeof(iface));
428     ifIndex = if_nametoindex(iface);
429     strlcpy(v4iface, v4interface.c_str(), sizeof(v4iface));
430     v4ifIndex = if_nametoindex(v4iface);
431 
432     // Pass in everything that clatd needs: interface, a fwmark for outgoing packets, the NAT64
433     // prefix, and the IPv4 and IPv6 addresses.
434     // Validate the prefix and strip off the prefix length.
435     uint8_t family;
436     uint8_t prefixLen;
437     int res = parsePrefix(nat64Prefix.c_str(), &family, &pfx96, sizeof(pfx96), &prefixLen);
438     // clatd only supports /96 prefixes.
439     if (res != sizeof(pfx96)) return res;
440     if (family != AF_INET6) return -EAFNOSUPPORT;
441     if (prefixLen != 96) return -EINVAL;
442     if (!inet_ntop(AF_INET6, &pfx96, pfx96String, sizeof(pfx96String))) return -errno;
443 
444     // Pick an IPv4 address.
445     // TODO: this picks the address based on other addresses that are assigned to interfaces, but
446     // the address is only actually assigned to an interface once clatd starts up. So we could end
447     // up with two clatd instances with the same IPv4 address.
448     // Stop doing this and instead pick a free one from the kV4Addr pool.
449     v4 = {selectIpv4Address(kV4Addr, kV4AddrLen)};
450     if (v4.s_addr == INADDR_NONE) {
451         ALOGE("No free IPv4 address in %s/%d", kV4AddrString, kV4AddrLen);
452         return -EADDRNOTAVAIL;
453     }
454     if (!inet_ntop(AF_INET, &v4, v4Str, sizeof(v4Str))) return -errno;
455 
456     // Generate a checksum-neutral IID.
457     if (generateIpv6Address(iface, v4, pfx96, &v6)) {
458         ALOGE("Unable to find global source address on %s for %s", iface, pfx96String);
459         return -EADDRNOTAVAIL;
460     }
461     if (!inet_ntop(AF_INET6, &v6, v6Str, sizeof(v6Str))) return -errno;
462 
463     ALOGD("starting clatd on %s v4=%s v6=%s pfx96=%s", iface, v4Str, v6Str, pfx96String);
464     return 0;
465 }
466 
startClatd(const std::string & interface,const std::string & nat64Prefix,std::string * v6Str)467 int ClatdController::startClatd(const std::string& interface, const std::string& nat64Prefix,
468                                 std::string* v6Str) {
469     std::lock_guard guard(mutex);
470 
471     // 1. fail if pre-existing tracker already exists
472     ClatdTracker* existing = getClatdTracker(interface);
473     if (existing != nullptr) {
474         ALOGE("clatd pid=%d already started on %s", existing->pid, interface.c_str());
475         return -EBUSY;
476     }
477 
478     // 2. get network id associated with this external interface
479     unsigned networkId = mNetCtrl->getNetworkForInterface(interface.c_str());
480     if (networkId == NETID_UNSET) {
481         ALOGE("Interface %s not assigned to any netId", interface.c_str());
482         return -ENODEV;
483     }
484 
485     // 3. open the tun device in non blocking mode as required by clatd
486     int res = open("/dev/net/tun", O_RDWR | O_NONBLOCK | O_CLOEXEC);
487     if (res == -1) {
488         res = errno;
489         ALOGE("open of tun device failed (%s)", strerror(res));
490         return -res;
491     }
492     unique_fd tmpTunFd(res);
493 
494     // 4. create the v4-... tun interface
495     std::string v4interface("v4-");
496     v4interface += interface;
497 
498     struct ifreq ifr = {
499             .ifr_flags = IFF_TUN,
500     };
501     strlcpy(ifr.ifr_name, v4interface.c_str(), sizeof(ifr.ifr_name));
502 
503     res = ioctl(tmpTunFd, TUNSETIFF, &ifr, sizeof(ifr));
504     if (res == -1) {
505         res = errno;
506         ALOGE("ioctl(TUNSETIFF) failed (%s)", strerror(res));
507         return -res;
508     }
509 
510     // disable IPv6 on it - failing to do so is not a critical error
511     res = InterfaceController::setEnableIPv6(v4interface.c_str(), 0);
512     if (res) ALOGE("setEnableIPv6 %s failed (%s)", v4interface.c_str(), strerror(res));
513 
514     // 5. initialize tracker object
515     ClatdTracker tracker;
516     int ret = tracker.init(networkId, interface, v4interface, nat64Prefix);
517     if (ret) return ret;
518 
519     // 6. create a throwaway socket to reserve a file descriptor number
520     res = socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
521     if (res == -1) {
522         res = errno;
523         ALOGE("socket(ipv6/udp) failed (%s)", strerror(res));
524         return -res;
525     }
526     unique_fd passedTunFd(res);
527 
528     // 7. this is the FD we'll pass to clatd on the cli, so need it as a string
529     char passedTunFdStr[INT32_STRLEN];
530     snprintf(passedTunFdStr, sizeof(passedTunFdStr), "%d", passedTunFd.get());
531 
532     // 8. we're going to use this as argv[0] to clatd to make ps output more useful
533     std::string progname("clatd-");
534     progname += tracker.iface;
535 
536     // clang-format off
537     const char* args[] = {progname.c_str(),
538                           "-i", tracker.iface,
539                           "-m", tracker.fwmarkString,
540                           "-p", tracker.pfx96String,
541                           "-4", tracker.v4Str,
542                           "-6", tracker.v6Str,
543                           "-t", passedTunFdStr,
544                           nullptr};
545     // clang-format on
546 
547     // 9. register vfork requirement
548     posix_spawnattr_t attr;
549     res = posix_spawnattr_init(&attr);
550     if (res) {
551         ALOGE("posix_spawnattr_init failed (%s)", strerror(res));
552         return -res;
553     }
554     const android::base::ScopeGuard attrGuard = [&] { posix_spawnattr_destroy(&attr); };
555     res = posix_spawnattr_setflags(&attr, POSIX_SPAWN_USEVFORK);
556     if (res) {
557         ALOGE("posix_spawnattr_setflags failed (%s)", strerror(res));
558         return -res;
559     }
560 
561     // 10. register dup2() action: this is what 'clears' the CLOEXEC flag
562     // on the tun fd that we want the child clatd process to inherit
563     // (this will happen after the vfork, and before the execve)
564     posix_spawn_file_actions_t fa;
565     res = posix_spawn_file_actions_init(&fa);
566     if (res) {
567         ALOGE("posix_spawn_file_actions_init failed (%s)", strerror(res));
568         return -res;
569     }
570     const android::base::ScopeGuard faGuard = [&] { posix_spawn_file_actions_destroy(&fa); };
571     res = posix_spawn_file_actions_adddup2(&fa, tmpTunFd, passedTunFd);
572     if (res) {
573         ALOGE("posix_spawn_file_actions_adddup2 failed (%s)", strerror(res));
574         return -res;
575     }
576 
577     // 11. add the drop rule for iptables.
578     setIptablesDropRule(true, tracker.iface, tracker.pfx96String, tracker.v6Str);
579 
580     // 12. actually perform vfork/dup2/execve
581     res = posix_spawn(&tracker.pid, kClatdPath, &fa, &attr, (char* const*)args, nullptr);
582     if (res) {
583         ALOGE("posix_spawn failed (%s)", strerror(res));
584         return -res;
585     }
586 
587     // 13. configure eBPF offload - if possible
588     maybeStartBpf(tracker);
589 
590     mClatdTrackers[interface] = tracker;
591     ALOGD("clatd started on %s", interface.c_str());
592 
593     *v6Str = tracker.v6Str;
594     return 0;
595 }
596 
stopClatd(const std::string & interface)597 int ClatdController::stopClatd(const std::string& interface) {
598     std::lock_guard guard(mutex);
599     ClatdTracker* tracker = getClatdTracker(interface);
600 
601     if (tracker == nullptr) {
602         ALOGE("clatd already stopped");
603         return -ENODEV;
604     }
605 
606     ALOGD("Stopping clatd pid=%d on %s", tracker->pid, interface.c_str());
607 
608     maybeStopBpf(*tracker);
609 
610     kill(tracker->pid, SIGTERM);
611     waitpid(tracker->pid, nullptr, 0);
612 
613     setIptablesDropRule(false, tracker->iface, tracker->pfx96String, tracker->v6Str);
614     mClatdTrackers.erase(interface);
615 
616     ALOGD("clatd on %s stopped", interface.c_str());
617 
618     return 0;
619 }
620 
dumpEgress(DumpWriter & dw)621 void ClatdController::dumpEgress(DumpWriter& dw) {
622     if (!mClatEgressMap.isValid()) return;  // if unsupported just don't dump anything
623 
624     ScopedIndent bpfIndent(dw);
625     dw.println("BPF egress map: iif(iface) v4Addr -> v6Addr nat64Prefix oif(iface)");
626 
627     ScopedIndent bpfDetailIndent(dw);
628     const auto printClatMap = [&dw](const ClatEgressKey& key, const ClatEgressValue& value,
629                                     const BpfMap<ClatEgressKey, ClatEgressValue>&) {
630         char iifStr[IFNAMSIZ] = "?";
631         char local4Str[INET_ADDRSTRLEN] = "?";
632         char local6Str[INET6_ADDRSTRLEN] = "?";
633         char pfx96Str[INET6_ADDRSTRLEN] = "?";
634         char oifStr[IFNAMSIZ] = "?";
635 
636         if_indextoname(key.iif, iifStr);
637         inet_ntop(AF_INET, &key.local4, local4Str, sizeof(local4Str));
638         inet_ntop(AF_INET6, &value.local6, local6Str, sizeof(local6Str));
639         inet_ntop(AF_INET6, &value.pfx96, pfx96Str, sizeof(pfx96Str));
640         if_indextoname(value.oif, oifStr);
641 
642         dw.println("%u(%s) %s -> %s %s/96 %u(%s) %s", key.iif, iifStr, local4Str, local6Str,
643                    pfx96Str, value.oif, oifStr, value.oifIsEthernet ? "ether" : "rawip");
644         return Result<void>();
645     };
646     auto res = mClatEgressMap.iterateWithValue(printClatMap);
647     if (!res.ok()) {
648         dw.println("Error printing BPF map: %s", res.error().message().c_str());
649     }
650 }
651 
dumpIngress(DumpWriter & dw)652 void ClatdController::dumpIngress(DumpWriter& dw) {
653     if (!mClatIngressMap.isValid()) return;  // if unsupported just don't dump anything
654 
655     ScopedIndent bpfIndent(dw);
656     dw.println("BPF ingress map: iif(iface) nat64Prefix v6Addr -> v4Addr oif(iface)");
657 
658     ScopedIndent bpfDetailIndent(dw);
659     const auto printClatMap = [&dw](const ClatIngressKey& key, const ClatIngressValue& value,
660                                     const BpfMap<ClatIngressKey, ClatIngressValue>&) {
661         char iifStr[IFNAMSIZ] = "?";
662         char pfx96Str[INET6_ADDRSTRLEN] = "?";
663         char local6Str[INET6_ADDRSTRLEN] = "?";
664         char local4Str[INET_ADDRSTRLEN] = "?";
665         char oifStr[IFNAMSIZ] = "?";
666 
667         if_indextoname(key.iif, iifStr);
668         inet_ntop(AF_INET6, &key.pfx96, pfx96Str, sizeof(pfx96Str));
669         inet_ntop(AF_INET6, &key.local6, local6Str, sizeof(local6Str));
670         inet_ntop(AF_INET, &value.local4, local4Str, sizeof(local4Str));
671         if_indextoname(value.oif, oifStr);
672 
673         dw.println("%u(%s) %s/96 %s -> %s %u(%s)", key.iif, iifStr, pfx96Str, local6Str, local4Str,
674                    value.oif, oifStr);
675         return Result<void>();
676     };
677     auto res = mClatIngressMap.iterateWithValue(printClatMap);
678     if (!res.ok()) {
679         dw.println("Error printing BPF map: %s", res.error().message().c_str());
680     }
681 }
682 
dumpTrackers(DumpWriter & dw)683 void ClatdController::dumpTrackers(DumpWriter& dw) {
684     ScopedIndent trackerIndent(dw);
685     dw.println("Trackers: iif[iface] nat64Prefix v6Addr -> v4Addr v4iif[v4iface] [fwmark]");
686 
687     ScopedIndent trackerDetailIndent(dw);
688     for (const auto& pair : mClatdTrackers) {
689         const ClatdTracker& tracker = pair.second;
690         dw.println("%u[%s] %s/96 %s -> %s %u[%s] [%s]", tracker.ifIndex, tracker.iface,
691                    tracker.pfx96String, tracker.v6Str, tracker.v4Str, tracker.v4ifIndex,
692                    tracker.v4iface, tracker.fwmarkString);
693     }
694 }
695 
dump(DumpWriter & dw)696 void ClatdController::dump(DumpWriter& dw) {
697     std::lock_guard guard(mutex);
698 
699     ScopedIndent clatdIndent(dw);
700     dw.println("ClatdController");
701 
702     dumpTrackers(dw);
703     dumpIngress(dw);
704     dumpEgress(dw);
705 }
706 
707 auto ClatdController::isIpv4AddressFreeFunc = isIpv4AddressFree;
708 auto ClatdController::iptablesRestoreFunction = execIptablesRestore;
709 
710 }  // namespace net
711 }  // namespace android
712