Skip to content

Commit b1216f1

Browse files
committed
Merge branch 'icmp-better-deal-with-ddos'
Eric Dumazet says: ==================== icmp: better deal with DDOS When dealing with death of big UDP servers, admins might want to increase net.ipv4.icmp_msgs_per_sec and net.ipv4.icmp_msgs_burst to big values (2,000,000 or more). They also might need to tune the per-host ratelimit to 1ms or 0ms in favor of the global rate limit. This series fixes bugs showing up in all these needs. ==================== Link: https://patch.msgid.link/20260216142832.3834174-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 570e454 + 9395b1b commit b1216f1

5 files changed

Lines changed: 31 additions & 19 deletions

File tree

Documentation/networking/ip-sysctl.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3234,12 +3234,13 @@ enhanced_dad - BOOLEAN
32343234
===========
32353235

32363236
ratelimit - INTEGER
3237-
Limit the maximal rates for sending ICMPv6 messages.
3237+
Limit the maximal rates for sending ICMPv6 messages to a particular
3238+
peer.
32383239

32393240
0 to disable any limiting,
3240-
otherwise the minimal space between responses in milliseconds.
3241+
otherwise the space between responses in milliseconds.
32413242

3242-
Default: 1000
3243+
Default: 100
32433244

32443245
ratemask - list of comma separated ranges
32453246
For ICMPv6 message types matching the ranges in the ratemask, limit

include/net/netns/ipv4.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ struct netns_ipv4 {
8888
int sysctl_tcp_rcvbuf_low_rtt;
8989
__cacheline_group_end(netns_ipv4_read_rx);
9090

91+
/* ICMP rate limiter hot cache line. */
92+
__cacheline_group_begin_aligned(icmp);
93+
atomic_t icmp_global_credit;
94+
u32 icmp_global_stamp;
95+
__cacheline_group_end_aligned(icmp);
96+
9197
struct inet_timewait_death_row tcp_death_row;
9298
struct udp_table *udp_table;
9399

@@ -141,8 +147,7 @@ struct netns_ipv4 {
141147
int sysctl_icmp_ratemask;
142148
int sysctl_icmp_msgs_per_sec;
143149
int sysctl_icmp_msgs_burst;
144-
atomic_t icmp_global_credit;
145-
u32 icmp_global_stamp;
150+
146151
u32 ip_rt_min_pmtu;
147152
int ip_rt_mtu_expires;
148153
int ip_rt_min_advmss;

net/ipv4/icmp.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,8 @@ bool icmp_global_allow(struct net *net)
250250
if (delta < HZ / 50)
251251
return false;
252252

253-
incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
253+
incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec);
254+
incr = div_u64((u64)incr * delta, HZ);
254255
if (!incr)
255256
return false;
256257

@@ -315,23 +316,29 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
315316
struct dst_entry *dst = &rt->dst;
316317
struct inet_peer *peer;
317318
struct net_device *dev;
319+
int peer_timeout;
318320
bool rc = true;
319321

320322
if (!apply_ratelimit)
321323
return true;
322324

325+
peer_timeout = READ_ONCE(net->ipv4.sysctl_icmp_ratelimit);
326+
if (!peer_timeout)
327+
goto out;
328+
323329
/* No rate limit on loopback */
324330
rcu_read_lock();
325331
dev = dst_dev_rcu(dst);
326332
if (dev && (dev->flags & IFF_LOOPBACK))
327-
goto out;
333+
goto out_unlock;
328334

329335
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
330336
l3mdev_master_ifindex_rcu(dev));
331-
rc = inet_peer_xrlim_allow(peer,
332-
READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
333-
out:
337+
rc = inet_peer_xrlim_allow(peer, peer_timeout);
338+
339+
out_unlock:
334340
rcu_read_unlock();
341+
out:
335342
if (!rc)
336343
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
337344
else

net/ipv6/af_inet6.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -952,7 +952,7 @@ static int __net_init inet6_net_init(struct net *net)
952952
int err = 0;
953953

954954
net->ipv6.sysctl.bindv6only = 0;
955-
net->ipv6.sysctl.icmpv6_time = 1*HZ;
955+
net->ipv6.sysctl.icmpv6_time = HZ / 10;
956956
net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
957957
net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
958958
net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;

net/ipv6/icmp.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -217,16 +217,15 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
217217
} else if (dev && (dev->flags & IFF_LOOPBACK)) {
218218
res = true;
219219
} else {
220-
struct rt6_info *rt = dst_rt6_info(dst);
221-
int tmo = net->ipv6.sysctl.icmpv6_time;
220+
int tmo = READ_ONCE(net->ipv6.sysctl.icmpv6_time);
222221
struct inet_peer *peer;
223222

224-
/* Give more bandwidth to wider prefixes. */
225-
if (rt->rt6i_dst.plen < 128)
226-
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
227-
228-
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
229-
res = inet_peer_xrlim_allow(peer, tmo);
223+
if (!tmo) {
224+
res = true;
225+
} else {
226+
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
227+
res = inet_peer_xrlim_allow(peer, tmo);
228+
}
230229
}
231230
rcu_read_unlock();
232231
if (!res)

0 commit comments

Comments
 (0)