1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "OffloadUtils.h"
18 
19 #include <arpa/inet.h>
20 #include <linux/if.h>
21 #include <linux/if_arp.h>
22 #include <linux/netlink.h>
23 #include <linux/pkt_cls.h>
24 #include <linux/pkt_sched.h>
25 #include <sys/ioctl.h>
26 #include <sys/socket.h>
27 #include <sys/types.h>
28 #include <unistd.h>
29 
30 #define LOG_TAG "OffloadUtils"
31 #include <log/log.h>
32 
33 #include "NetlinkCommands.h"
34 #include "android-base/unique_fd.h"
35 
36 namespace android {
37 namespace net {
38 
39 using std::max;
40 
hardwareAddressType(const std::string & interface)41 int hardwareAddressType(const std::string& interface) {
42     base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
43 
44     if (ufd < 0) {
45         const int err = errno;
46         ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
47         return -err;
48     };
49 
50     struct ifreq ifr = {};
51     // We use strncpy() instead of strlcpy() since kernel has to be able
52     // to handle non-zero terminated junk passed in by userspace anyway,
53     // and this way too long interface names (more than IFNAMSIZ-1 = 15
54     // characters plus terminating NULL) will not get truncated to 15
55     // characters and zero-terminated and thus potentially erroneously
56     // match a truncated interface if one were to exist.
57     strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
58 
59     if (ioctl(ufd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) return -errno;
60 
61     return ifr.ifr_hwaddr.sa_family;
62 }
63 
isEthernet(const std::string & interface)64 base::Result<bool> isEthernet(const std::string& interface) {
65     int rv = hardwareAddressType(interface);
66     if (rv < 0) {
67         errno = -rv;
68         return ErrnoErrorf("Get hardware address type of interface {} failed", interface);
69     }
70 
71     switch (rv) {
72         case ARPHRD_ETHER:
73             return true;
74         case ARPHRD_NONE:
75         case ARPHRD_RAWIP:  // in Linux 4.14+ rmnet support was upstreamed and this is 519
76         case 530:           // this is ARPHRD_RAWIP on some Android 4.9 kernels with rmnet
77             return false;
78         default:
79             errno = EAFNOSUPPORT;  // Address family not supported
80             return ErrnoErrorf("Unknown hardware address type {} on interface {}", rv, interface);
81     }
82 }
83 
84 // TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
85 // and //system/netd/server/SockDiag.cpp:checkError(fd)
sendAndProcessNetlinkResponse(const void * req,int len)86 static int sendAndProcessNetlinkResponse(const void* req, int len) {
87     base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
88     if (fd == -1) {
89         const int err = errno;
90         ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
91         return -err;
92     }
93 
94     static constexpr int on = 1;
95     int rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
96     if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
97 
98     // this is needed to get sane strace netlink parsing, it allocates the pid
99     rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
100     if (rv) {
101         const int err = errno;
102         ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
103         return -err;
104     }
105 
106     // we do not want to receive messages from anyone besides the kernel
107     rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
108     if (rv) {
109         const int err = errno;
110         ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
111         return -err;
112     }
113 
114     rv = send(fd, req, len, 0);
115     if (rv == -1) return -errno;
116     if (rv != len) return -EMSGSIZE;
117 
118     struct {
119         nlmsghdr h;
120         nlmsgerr e;
121         char buf[256];
122     } resp = {};
123 
124     rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
125 
126     if (rv == -1) {
127         const int err = errno;
128         ALOGE("recv() failed");
129         return -err;
130     }
131 
132     if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
133         ALOGE("recv() returned short packet: %d", rv);
134         return -EMSGSIZE;
135     }
136 
137     if (resp.h.nlmsg_len != (unsigned)rv) {
138         ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
139         return -EBADMSG;
140     }
141 
142     if (resp.h.nlmsg_type != NLMSG_ERROR) {
143         ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
144         return -EBADMSG;
145     }
146 
147     return resp.e.error;  // returns 0 on success
148 }
149 
150 // ADD:     nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
151 // REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
152 // DEL:     nlMsgType=RTM_DELQDISC nlMsgFlags=0
doTcQdiscClsact(int ifIndex,uint16_t nlMsgType,uint16_t nlMsgFlags)153 int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
154     // This is the name of the qdisc we are attaching.
155     // Some hoop jumping to make this compile time constant with known size,
156     // so that the structure declaration is well defined at compile time.
157 #define CLSACT "clsact"
158     // sizeof() includes the terminating NULL
159     static constexpr size_t ASCIIZ_LEN_CLSACT = sizeof(CLSACT);
160 
161     const struct {
162         nlmsghdr n;
163         tcmsg t;
164         struct {
165             nlattr attr;
166             char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
167         } kind;
168     } req = {
169             .n =
170                     {
171                             .nlmsg_len = sizeof(req),
172                             .nlmsg_type = nlMsgType,
173                             .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
174                     },
175             .t =
176                     {
177                             .tcm_family = AF_UNSPEC,
178                             .tcm_ifindex = ifIndex,
179                             .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
180                             .tcm_parent = TC_H_CLSACT,
181                     },
182             .kind =
183                     {
184                             .attr =
185                                     {
186                                             .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
187                                             .nla_type = TCA_KIND,
188                                     },
189                             .str = CLSACT,
190                     },
191     };
192 #undef CLSACT
193 
194     return sendAndProcessNetlinkResponse(&req, sizeof(req));
195 }
196 
197 // tc filter add dev .. in/egress prio 1 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
198 // direct-action
tcFilterAddDevBpf(int ifIndex,bool ingress,uint16_t prio,uint16_t proto,int bpfFd,bool ethernet)199 int tcFilterAddDevBpf(int ifIndex, bool ingress, uint16_t prio, uint16_t proto, int bpfFd,
200                       bool ethernet) {
201     // This is the name of the filter we're attaching (ie. this is the 'bpf'
202     // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
203     //
204     // We go through some hoops in order to make this compile time constants
205     // so that we can define the struct further down the function with the
206     // field for this sized correctly already during the build.
207 #define BPF "bpf"
208     // sizeof() includes the terminating NULL
209     static constexpr size_t ASCIIZ_LEN_BPF = sizeof(BPF);
210 
211     // This is to replicate program name suffix used by 'tc' Linux cli
212     // when it attaches programs.
213 #define FSOBJ_SUFFIX ":[*fsobj]"
214 
215     // This macro expands (from header files) to:
216     //   prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
217     // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
218     // (also compatible with anything that has 0 size L2 header)
219     static constexpr char name_clat_rx_rawip[] = CLAT_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX;
220 
221     // This macro expands (from header files) to:
222     //   prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
223     // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
224     // (also compatible with anything that has standard ethernet header)
225     static constexpr char name_clat_rx_ether[] = CLAT_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX;
226 
227     // This macro expands (from header files) to:
228     //   prog_clatd_schedcls_egress_clat_rawip:[*fsobj]
229     // and is the name of the pinned egress ebpf program for ARPHRD_RAWIP interfaces.
230     // (also compatible with anything that has 0 size L2 header)
231     static constexpr char name_clat_tx_rawip[] = CLAT_EGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX;
232 
233     // This macro expands (from header files) to:
234     //   prog_clatd_schedcls_egress_clat_ether:[*fsobj]
235     // and is the name of the pinned egress ebpf program for ARPHRD_ETHER interfaces.
236     // (also compatible with anything that has standard ethernet header)
237     static constexpr char name_clat_tx_ether[] = CLAT_EGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX;
238 
239     // This macro expands (from header files) to:
240     //   prog_offload_schedcls_ingress_tether_rawip:[*fsobj]
241     // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
242     // (also compatible with anything that has 0 size L2 header)
243     static constexpr char name_tether_rawip[] = TETHER_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX;
244 
245     // This macro expands (from header files) to:
246     //   prog_offload_schedcls_ingress_tether_ether:[*fsobj]
247     // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
248     // (also compatible with anything that has standard ethernet header)
249     static constexpr char name_tether_ether[] = TETHER_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX;
250 
251 #undef FSOBJ_SUFFIX
252 
253     // The actual name we'll use is determined at run time via 'ethernet' and 'ingress'
254     // booleans.  We need to compile time allocate enough space in the struct
255     // hence this macro magic to make sure we have enough space for either
256     // possibility.  In practice some of these are actually the same size.
257     static constexpr size_t ASCIIZ_MAXLEN_NAME = max({
258             sizeof(name_clat_rx_rawip),
259             sizeof(name_clat_rx_ether),
260             sizeof(name_clat_tx_rawip),
261             sizeof(name_clat_tx_ether),
262             sizeof(name_tether_rawip),
263             sizeof(name_tether_ether),
264     });
265 
266     // These are not compile time constants: 'name' is used in strncpy below
267     const char* const name_clat_rx = ethernet ? name_clat_rx_ether : name_clat_rx_rawip;
268     const char* const name_clat_tx = ethernet ? name_clat_tx_ether : name_clat_tx_rawip;
269     const char* const name_clat = ingress ? name_clat_rx : name_clat_tx;
270     const char* const name_tether = ethernet ? name_tether_ether : name_tether_rawip;
271     const char* const name = (prio == PRIO_TETHER) ? name_tether : name_clat;
272 
273     struct {
274         nlmsghdr n;
275         tcmsg t;
276         struct {
277             nlattr attr;
278             char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
279         } kind;
280         struct {
281             nlattr attr;
282             struct {
283                 nlattr attr;
284                 __u32 u32;
285             } fd;
286             struct {
287                 nlattr attr;
288                 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
289             } name;
290             struct {
291                 nlattr attr;
292                 __u32 u32;
293             } flags;
294         } options;
295     } req = {
296             .n =
297                     {
298                             .nlmsg_len = sizeof(req),
299                             .nlmsg_type = RTM_NEWTFILTER,
300                             .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
301                     },
302             .t =
303                     {
304                             .tcm_family = AF_UNSPEC,
305                             .tcm_ifindex = ifIndex,
306                             .tcm_handle = TC_H_UNSPEC,
307                             .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
308                                                     ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
309                             .tcm_info = static_cast<__u32>((prio << 16) | htons(proto)),
310                     },
311             .kind =
312                     {
313                             .attr =
314                                     {
315                                             .nla_len = sizeof(req.kind),
316                                             .nla_type = TCA_KIND,
317                                     },
318                             .str = BPF,
319                     },
320             .options =
321                     {
322                             .attr =
323                                     {
324                                             .nla_len = sizeof(req.options),
325                                             .nla_type = TCA_OPTIONS,
326                                     },
327                             .fd =
328                                     {
329                                             .attr =
330                                                     {
331                                                             .nla_len = sizeof(req.options.fd),
332                                                             .nla_type = TCA_BPF_FD,
333                                                     },
334                                             .u32 = static_cast<__u32>(bpfFd),
335                                     },
336                             .name =
337                                     {
338                                             .attr =
339                                                     {
340                                                             .nla_len = sizeof(req.options.name),
341                                                             .nla_type = TCA_BPF_NAME,
342                                                     },
343                                             // Visible via 'tc filter show', but
344                                             // is overwritten by strncpy below
345                                             .str = "placeholder",
346                                     },
347                             .flags =
348                                     {
349                                             .attr =
350                                                     {
351                                                             .nla_len = sizeof(req.options.flags),
352                                                             .nla_type = TCA_BPF_FLAGS,
353                                                     },
354                                             .u32 = TCA_BPF_FLAG_ACT_DIRECT,
355                                     },
356                     },
357     };
358 #undef BPF
359 
360     strncpy(req.options.name.str, name, sizeof(req.options.name.str));
361 
362     return sendAndProcessNetlinkResponse(&req, sizeof(req));
363 }
364 
365 // tc filter del dev .. in/egress prio .. protocol ..
tcFilterDelDev(int ifIndex,bool ingress,uint16_t prio,uint16_t proto)366 int tcFilterDelDev(int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
367     const struct {
368         nlmsghdr n;
369         tcmsg t;
370     } req = {
371             .n =
372                     {
373                             .nlmsg_len = sizeof(req),
374                             .nlmsg_type = RTM_DELTFILTER,
375                             .nlmsg_flags = NETLINK_REQUEST_FLAGS,
376                     },
377             .t =
378                     {
379                             .tcm_family = AF_UNSPEC,
380                             .tcm_ifindex = ifIndex,
381                             .tcm_handle = TC_H_UNSPEC,
382                             .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
383                                                     ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
384                             .tcm_info = static_cast<__u32>((prio << 16) | htons(proto)),
385                     },
386     };
387 
388     return sendAndProcessNetlinkResponse(&req, sizeof(req));
389 }
390 
391 }  // namespace net
392 }  // namespace android
393