|
| 1 | +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
| 2 | + |
| 3 | +/* |
| 4 | + * Test suite of lwt BPF programs that reroutes packets |
| 5 | + * The file tests focus not only if these programs work as expected normally, |
| 6 | + * but also if they can handle abnormal situations gracefully. This test |
| 7 | + * suite currently only covers lwt_xmit hook. lwt_in tests have not been |
| 8 | + * implemented. |
| 9 | + * |
| 10 | + * WARNING |
| 11 | + * ------- |
| 12 | + * This test suite can crash the kernel, thus should be run in a VM. |
| 13 | + * |
| 14 | + * Setup: |
| 15 | + * --------- |
| 16 | + * all tests are performed in a single netns. A lwt encap route is setup for |
| 17 | + * each subtest: |
| 18 | + * |
| 19 | + * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err |
| 20 | + * |
| 21 | + * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains |
| 22 | + * a single test program entry. This program sets packet mark by last byte of |
| 23 | + * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb |
| 24 | + * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped |
| 25 | + * to avoid route loop. We didn't use generated BPF skeleton since the |
| 26 | + * attachment for lwt programs are not supported by libbpf yet. |
| 27 | + * |
| 28 | + * The test program will bring up a tun device, and sets up the following |
| 29 | + * routes: |
| 30 | + * |
| 31 | + * ip rule add pref 100 from all fwmark <tun_index> lookup 100 |
| 32 | + * ip route add table 100 default dev tun0 |
| 33 | + * |
| 34 | + * For normal testing, a ping command is running in the test netns: |
| 35 | + * |
| 36 | + * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100 |
| 37 | + * |
| 38 | + * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP |
| 39 | + * socket will try to overflow the fq queue and trigger qdisc drop error. |
| 40 | + * |
| 41 | + * Scenarios: |
| 42 | + * -------------------------------- |
| 43 | + * 1. Reroute to a running tun device |
| 44 | + * 2. Reroute to a device where qdisc drop |
| 45 | + * |
| 46 | + * For case 1, ping packets should be received by the tun device. |
| 47 | + * |
| 48 | + * For case 2, force UDP packets to overflow fq limit. As long as kernel |
| 49 | + * is not crashed, it is considered successful. |
| 50 | + */ |
| 51 | +#include "lwt_helpers.h" |
| 52 | +#include "network_helpers.h" |
| 53 | +#include <linux/net_tstamp.h> |
| 54 | + |
| 55 | +#define BPF_OBJECT "test_lwt_reroute.bpf.o" |
| 56 | +#define LOCAL_SRC "10.0.0.1" |
| 57 | +#define TEST_CIDR "10.0.0.0/24" |
| 58 | +#define XMIT_HOOK "xmit" |
| 59 | +#define XMIT_SECTION "lwt_xmit" |
| 60 | +#define NSEC_PER_SEC 1000000000ULL |
| 61 | + |
| 62 | +/* send a ping to be rerouted to the target device */ |
| 63 | +static void ping_once(const char *ip) |
| 64 | +{ |
| 65 | + /* We won't get a reply. Don't fail here */ |
| 66 | + SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", |
| 67 | + ip, ICMP_PAYLOAD_SIZE); |
| 68 | +} |
| 69 | + |
| 70 | +/* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop |
| 71 | + * error. This is done via TX tstamp to force buffering delayed packets. |
| 72 | + */ |
| 73 | +static int overflow_fq(int snd_target, const char *target_ip) |
| 74 | +{ |
| 75 | + struct sockaddr_in addr = { |
| 76 | + .sin_family = AF_INET, |
| 77 | + .sin_port = htons(1234), |
| 78 | + }; |
| 79 | + |
| 80 | + char data_buf[8]; /* only #pkts matter, so use a random small buffer */ |
| 81 | + char control_buf[CMSG_SPACE(sizeof(uint64_t))]; |
| 82 | + struct iovec iov = { |
| 83 | + .iov_base = data_buf, |
| 84 | + .iov_len = sizeof(data_buf), |
| 85 | + }; |
| 86 | + int err = -1; |
| 87 | + int s = -1; |
| 88 | + struct sock_txtime txtime_on = { |
| 89 | + .clockid = CLOCK_MONOTONIC, |
| 90 | + .flags = 0, |
| 91 | + }; |
| 92 | + struct msghdr msg = { |
| 93 | + .msg_name = &addr, |
| 94 | + .msg_namelen = sizeof(addr), |
| 95 | + .msg_control = control_buf, |
| 96 | + .msg_controllen = sizeof(control_buf), |
| 97 | + .msg_iovlen = 1, |
| 98 | + .msg_iov = &iov, |
| 99 | + }; |
| 100 | + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); |
| 101 | + |
| 102 | + memset(data_buf, 0, sizeof(data_buf)); |
| 103 | + |
| 104 | + s = socket(AF_INET, SOCK_DGRAM, 0); |
| 105 | + if (!ASSERT_GE(s, 0, "socket")) |
| 106 | + goto out; |
| 107 | + |
| 108 | + err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on)); |
| 109 | + if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)")) |
| 110 | + goto out; |
| 111 | + |
| 112 | + err = inet_pton(AF_INET, target_ip, &addr.sin_addr); |
| 113 | + if (!ASSERT_EQ(err, 1, "inet_pton")) |
| 114 | + goto out; |
| 115 | + |
| 116 | + while (snd_target > 0) { |
| 117 | + struct timespec now; |
| 118 | + |
| 119 | + memset(control_buf, 0, sizeof(control_buf)); |
| 120 | + cmsg->cmsg_type = SCM_TXTIME; |
| 121 | + cmsg->cmsg_level = SOL_SOCKET; |
| 122 | + cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t)); |
| 123 | + |
| 124 | + err = clock_gettime(CLOCK_MONOTONIC, &now); |
| 125 | + if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) { |
| 126 | + err = -1; |
| 127 | + goto out; |
| 128 | + } |
| 129 | + |
| 130 | + *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC + |
| 131 | + now.tv_nsec; |
| 132 | + |
| 133 | + /* we will intentionally send more than fq limit, so ignore |
| 134 | + * the error here. |
| 135 | + */ |
| 136 | + sendmsg(s, &msg, MSG_NOSIGNAL); |
| 137 | + snd_target--; |
| 138 | + } |
| 139 | + |
| 140 | + /* no kernel crash so far is considered success */ |
| 141 | + err = 0; |
| 142 | + |
| 143 | +out: |
| 144 | + if (s >= 0) |
| 145 | + close(s); |
| 146 | + |
| 147 | + return err; |
| 148 | +} |
| 149 | + |
| 150 | +static int setup(const char *tun_dev) |
| 151 | +{ |
| 152 | + int target_index = -1; |
| 153 | + int tap_fd = -1; |
| 154 | + |
| 155 | + tap_fd = open_tuntap(tun_dev, false); |
| 156 | + if (!ASSERT_GE(tap_fd, 0, "open_tun")) |
| 157 | + return -1; |
| 158 | + |
| 159 | + target_index = if_nametoindex(tun_dev); |
| 160 | + if (!ASSERT_GE(target_index, 0, "if_nametoindex")) |
| 161 | + return -1; |
| 162 | + |
| 163 | + SYS(fail, "ip link add link_err type dummy"); |
| 164 | + SYS(fail, "ip link set lo up"); |
| 165 | + SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); |
| 166 | + SYS(fail, "ip link set link_err up"); |
| 167 | + SYS(fail, "ip link set %s up", tun_dev); |
| 168 | + |
| 169 | + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit", |
| 170 | + TEST_CIDR, BPF_OBJECT); |
| 171 | + |
| 172 | + SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100", |
| 173 | + target_index); |
| 174 | + SYS(fail, "ip route add t 100 default dev %s", tun_dev); |
| 175 | + |
| 176 | + return tap_fd; |
| 177 | + |
| 178 | +fail: |
| 179 | + if (tap_fd >= 0) |
| 180 | + close(tap_fd); |
| 181 | + return -1; |
| 182 | +} |
| 183 | + |
| 184 | +static void test_lwt_reroute_normal_xmit(void) |
| 185 | +{ |
| 186 | + const char *tun_dev = "tun0"; |
| 187 | + int tun_fd = -1; |
| 188 | + int ifindex = -1; |
| 189 | + char ip[256]; |
| 190 | + struct timeval timeo = { |
| 191 | + .tv_sec = 0, |
| 192 | + .tv_usec = 250000, |
| 193 | + }; |
| 194 | + |
| 195 | + tun_fd = setup(tun_dev); |
| 196 | + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) |
| 197 | + return; |
| 198 | + |
| 199 | + ifindex = if_nametoindex(tun_dev); |
| 200 | + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) |
| 201 | + return; |
| 202 | + |
| 203 | + snprintf(ip, 256, "10.0.0.%d", ifindex); |
| 204 | + |
| 205 | + /* ping packets should be received by the tun device */ |
| 206 | + ping_once(ip); |
| 207 | + |
| 208 | + if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1, |
| 209 | + "wait_for_packet")) |
| 210 | + log_err("%s xmit", __func__); |
| 211 | +} |
| 212 | + |
| 213 | +/* |
| 214 | + * Test the failure case when the skb is dropped at the qdisc. This is a |
| 215 | + * regression prevention at the xmit hook only. |
| 216 | + */ |
| 217 | +static void test_lwt_reroute_qdisc_dropped(void) |
| 218 | +{ |
| 219 | + const char *tun_dev = "tun0"; |
| 220 | + int tun_fd = -1; |
| 221 | + int ifindex = -1; |
| 222 | + char ip[256]; |
| 223 | + |
| 224 | + tun_fd = setup(tun_dev); |
| 225 | + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) |
| 226 | + goto fail; |
| 227 | + |
| 228 | + SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev); |
| 229 | + |
| 230 | + ifindex = if_nametoindex(tun_dev); |
| 231 | + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) |
| 232 | + return; |
| 233 | + |
| 234 | + snprintf(ip, 256, "10.0.0.%d", ifindex); |
| 235 | + ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq"); |
| 236 | + |
| 237 | +fail: |
| 238 | + if (tun_fd >= 0) |
| 239 | + close(tun_fd); |
| 240 | +} |
| 241 | + |
| 242 | +static void *test_lwt_reroute_run(void *arg) |
| 243 | +{ |
| 244 | + netns_delete(); |
| 245 | + RUN_TEST(lwt_reroute_normal_xmit); |
| 246 | + RUN_TEST(lwt_reroute_qdisc_dropped); |
| 247 | + return NULL; |
| 248 | +} |
| 249 | + |
| 250 | +void test_lwt_reroute(void) |
| 251 | +{ |
| 252 | + pthread_t test_thread; |
| 253 | + int err; |
| 254 | + |
| 255 | + /* Run the tests in their own thread to isolate the namespace changes |
| 256 | + * so they do not affect the environment of other tests. |
| 257 | + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) |
| 258 | + */ |
| 259 | + err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL); |
| 260 | + if (ASSERT_OK(err, "pthread_create")) |
| 261 | + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); |
| 262 | +} |
0 commit comments