1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "OffloadUtils.h"
18
19 #include <arpa/inet.h>
20 #include <linux/if.h>
21 #include <linux/if_arp.h>
22 #include <linux/netlink.h>
23 #include <linux/pkt_cls.h>
24 #include <linux/pkt_sched.h>
25 #include <sys/ioctl.h>
26 #include <sys/socket.h>
27 #include <sys/types.h>
28 #include <unistd.h>
29
30 #define LOG_TAG "OffloadUtils"
31 #include <log/log.h>
32
33 #include "NetlinkCommands.h"
34 #include "android-base/unique_fd.h"
35
36 namespace android {
37 namespace net {
38
39 using std::max;
40
hardwareAddressType(const std::string & interface)41 int hardwareAddressType(const std::string& interface) {
42 base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
43
44 if (ufd < 0) {
45 const int err = errno;
46 ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
47 return -err;
48 };
49
50 struct ifreq ifr = {};
51 // We use strncpy() instead of strlcpy() since kernel has to be able
52 // to handle non-zero terminated junk passed in by userspace anyway,
53 // and this way too long interface names (more than IFNAMSIZ-1 = 15
54 // characters plus terminating NULL) will not get truncated to 15
55 // characters and zero-terminated and thus potentially erroneously
56 // match a truncated interface if one were to exist.
57 strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
58
59 if (ioctl(ufd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) return -errno;
60
61 return ifr.ifr_hwaddr.sa_family;
62 }
63
isEthernet(const std::string & interface)64 base::Result<bool> isEthernet(const std::string& interface) {
65 int rv = hardwareAddressType(interface);
66 if (rv < 0) {
67 errno = -rv;
68 return ErrnoErrorf("Get hardware address type of interface {} failed", interface);
69 }
70
71 switch (rv) {
72 case ARPHRD_ETHER:
73 return true;
74 case ARPHRD_NONE:
75 case ARPHRD_RAWIP: // in Linux 4.14+ rmnet support was upstreamed and this is 519
76 case 530: // this is ARPHRD_RAWIP on some Android 4.9 kernels with rmnet
77 return false;
78 default:
79 errno = EAFNOSUPPORT; // Address family not supported
80 return ErrnoErrorf("Unknown hardware address type {} on interface {}", rv, interface);
81 }
82 }
83
84 // TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
85 // and //system/netd/server/SockDiag.cpp:checkError(fd)
sendAndProcessNetlinkResponse(const void * req,int len)86 static int sendAndProcessNetlinkResponse(const void* req, int len) {
87 base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
88 if (fd == -1) {
89 const int err = errno;
90 ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
91 return -err;
92 }
93
94 static constexpr int on = 1;
95 int rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
96 if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
97
98 // this is needed to get sane strace netlink parsing, it allocates the pid
99 rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
100 if (rv) {
101 const int err = errno;
102 ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
103 return -err;
104 }
105
106 // we do not want to receive messages from anyone besides the kernel
107 rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
108 if (rv) {
109 const int err = errno;
110 ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
111 return -err;
112 }
113
114 rv = send(fd, req, len, 0);
115 if (rv == -1) return -errno;
116 if (rv != len) return -EMSGSIZE;
117
118 struct {
119 nlmsghdr h;
120 nlmsgerr e;
121 char buf[256];
122 } resp = {};
123
124 rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
125
126 if (rv == -1) {
127 const int err = errno;
128 ALOGE("recv() failed");
129 return -err;
130 }
131
132 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
133 ALOGE("recv() returned short packet: %d", rv);
134 return -EMSGSIZE;
135 }
136
137 if (resp.h.nlmsg_len != (unsigned)rv) {
138 ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
139 return -EBADMSG;
140 }
141
142 if (resp.h.nlmsg_type != NLMSG_ERROR) {
143 ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
144 return -EBADMSG;
145 }
146
147 return resp.e.error; // returns 0 on success
148 }
149
150 // ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
151 // REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
152 // DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
doTcQdiscClsact(int ifIndex,uint16_t nlMsgType,uint16_t nlMsgFlags)153 int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
154 // This is the name of the qdisc we are attaching.
155 // Some hoop jumping to make this compile time constant with known size,
156 // so that the structure declaration is well defined at compile time.
157 #define CLSACT "clsact"
158 // sizeof() includes the terminating NULL
159 static constexpr size_t ASCIIZ_LEN_CLSACT = sizeof(CLSACT);
160
161 const struct {
162 nlmsghdr n;
163 tcmsg t;
164 struct {
165 nlattr attr;
166 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
167 } kind;
168 } req = {
169 .n =
170 {
171 .nlmsg_len = sizeof(req),
172 .nlmsg_type = nlMsgType,
173 .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
174 },
175 .t =
176 {
177 .tcm_family = AF_UNSPEC,
178 .tcm_ifindex = ifIndex,
179 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
180 .tcm_parent = TC_H_CLSACT,
181 },
182 .kind =
183 {
184 .attr =
185 {
186 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
187 .nla_type = TCA_KIND,
188 },
189 .str = CLSACT,
190 },
191 };
192 #undef CLSACT
193
194 return sendAndProcessNetlinkResponse(&req, sizeof(req));
195 }
196
197 // tc filter add dev .. in/egress prio 1 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
198 // direct-action
tcFilterAddDevBpf(int ifIndex,bool ingress,uint16_t prio,uint16_t proto,int bpfFd,bool ethernet)199 int tcFilterAddDevBpf(int ifIndex, bool ingress, uint16_t prio, uint16_t proto, int bpfFd,
200 bool ethernet) {
201 // This is the name of the filter we're attaching (ie. this is the 'bpf'
202 // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
203 //
204 // We go through some hoops in order to make this compile time constants
205 // so that we can define the struct further down the function with the
206 // field for this sized correctly already during the build.
207 #define BPF "bpf"
208 // sizeof() includes the terminating NULL
209 static constexpr size_t ASCIIZ_LEN_BPF = sizeof(BPF);
210
211 // This is to replicate program name suffix used by 'tc' Linux cli
212 // when it attaches programs.
213 #define FSOBJ_SUFFIX ":[*fsobj]"
214
215 // This macro expands (from header files) to:
216 // prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
217 // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
218 // (also compatible with anything that has 0 size L2 header)
219 static constexpr char name_clat_rx_rawip[] = CLAT_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX;
220
221 // This macro expands (from header files) to:
222 // prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
223 // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
224 // (also compatible with anything that has standard ethernet header)
225 static constexpr char name_clat_rx_ether[] = CLAT_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX;
226
227 // This macro expands (from header files) to:
228 // prog_clatd_schedcls_egress_clat_rawip:[*fsobj]
229 // and is the name of the pinned egress ebpf program for ARPHRD_RAWIP interfaces.
230 // (also compatible with anything that has 0 size L2 header)
231 static constexpr char name_clat_tx_rawip[] = CLAT_EGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX;
232
233 // This macro expands (from header files) to:
234 // prog_clatd_schedcls_egress_clat_ether:[*fsobj]
235 // and is the name of the pinned egress ebpf program for ARPHRD_ETHER interfaces.
236 // (also compatible with anything that has standard ethernet header)
237 static constexpr char name_clat_tx_ether[] = CLAT_EGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX;
238
239 // This macro expands (from header files) to:
240 // prog_offload_schedcls_ingress_tether_rawip:[*fsobj]
241 // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
242 // (also compatible with anything that has 0 size L2 header)
243 static constexpr char name_tether_rawip[] = TETHER_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX;
244
245 // This macro expands (from header files) to:
246 // prog_offload_schedcls_ingress_tether_ether:[*fsobj]
247 // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
248 // (also compatible with anything that has standard ethernet header)
249 static constexpr char name_tether_ether[] = TETHER_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX;
250
251 #undef FSOBJ_SUFFIX
252
253 // The actual name we'll use is determined at run time via 'ethernet' and 'ingress'
254 // booleans. We need to compile time allocate enough space in the struct
255 // hence this macro magic to make sure we have enough space for either
256 // possibility. In practice some of these are actually the same size.
257 static constexpr size_t ASCIIZ_MAXLEN_NAME = max({
258 sizeof(name_clat_rx_rawip),
259 sizeof(name_clat_rx_ether),
260 sizeof(name_clat_tx_rawip),
261 sizeof(name_clat_tx_ether),
262 sizeof(name_tether_rawip),
263 sizeof(name_tether_ether),
264 });
265
266 // These are not compile time constants: 'name' is used in strncpy below
267 const char* const name_clat_rx = ethernet ? name_clat_rx_ether : name_clat_rx_rawip;
268 const char* const name_clat_tx = ethernet ? name_clat_tx_ether : name_clat_tx_rawip;
269 const char* const name_clat = ingress ? name_clat_rx : name_clat_tx;
270 const char* const name_tether = ethernet ? name_tether_ether : name_tether_rawip;
271 const char* const name = (prio == PRIO_TETHER) ? name_tether : name_clat;
272
273 struct {
274 nlmsghdr n;
275 tcmsg t;
276 struct {
277 nlattr attr;
278 char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
279 } kind;
280 struct {
281 nlattr attr;
282 struct {
283 nlattr attr;
284 __u32 u32;
285 } fd;
286 struct {
287 nlattr attr;
288 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
289 } name;
290 struct {
291 nlattr attr;
292 __u32 u32;
293 } flags;
294 } options;
295 } req = {
296 .n =
297 {
298 .nlmsg_len = sizeof(req),
299 .nlmsg_type = RTM_NEWTFILTER,
300 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
301 },
302 .t =
303 {
304 .tcm_family = AF_UNSPEC,
305 .tcm_ifindex = ifIndex,
306 .tcm_handle = TC_H_UNSPEC,
307 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
308 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
309 .tcm_info = static_cast<__u32>((prio << 16) | htons(proto)),
310 },
311 .kind =
312 {
313 .attr =
314 {
315 .nla_len = sizeof(req.kind),
316 .nla_type = TCA_KIND,
317 },
318 .str = BPF,
319 },
320 .options =
321 {
322 .attr =
323 {
324 .nla_len = sizeof(req.options),
325 .nla_type = TCA_OPTIONS,
326 },
327 .fd =
328 {
329 .attr =
330 {
331 .nla_len = sizeof(req.options.fd),
332 .nla_type = TCA_BPF_FD,
333 },
334 .u32 = static_cast<__u32>(bpfFd),
335 },
336 .name =
337 {
338 .attr =
339 {
340 .nla_len = sizeof(req.options.name),
341 .nla_type = TCA_BPF_NAME,
342 },
343 // Visible via 'tc filter show', but
344 // is overwritten by strncpy below
345 .str = "placeholder",
346 },
347 .flags =
348 {
349 .attr =
350 {
351 .nla_len = sizeof(req.options.flags),
352 .nla_type = TCA_BPF_FLAGS,
353 },
354 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
355 },
356 },
357 };
358 #undef BPF
359
360 strncpy(req.options.name.str, name, sizeof(req.options.name.str));
361
362 return sendAndProcessNetlinkResponse(&req, sizeof(req));
363 }
364
365 // tc filter del dev .. in/egress prio .. protocol ..
tcFilterDelDev(int ifIndex,bool ingress,uint16_t prio,uint16_t proto)366 int tcFilterDelDev(int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
367 const struct {
368 nlmsghdr n;
369 tcmsg t;
370 } req = {
371 .n =
372 {
373 .nlmsg_len = sizeof(req),
374 .nlmsg_type = RTM_DELTFILTER,
375 .nlmsg_flags = NETLINK_REQUEST_FLAGS,
376 },
377 .t =
378 {
379 .tcm_family = AF_UNSPEC,
380 .tcm_ifindex = ifIndex,
381 .tcm_handle = TC_H_UNSPEC,
382 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
383 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
384 .tcm_info = static_cast<__u32>((prio << 16) | htons(proto)),
385 },
386 };
387
388 return sendAndProcessNetlinkResponse(&req, sizeof(req));
389 }
390
391 } // namespace net
392 } // namespace android
393