Skip to content

Commit be102eb

Browse files
ffmanceraummakynes
authored andcommitted
netfilter: nf_conncount: rework API to use sk_buff directly
When using nf_conncount infrastructure for non-confirmed connections a duplicated track is possible due to an optimization introduced since commit d265929 ("netfilter: nf_conncount: reduce unnecessary GC"). In order to fix this introduce a new conncount API that receives directly an sk_buff struct. It fetches the tuple and zone and the corresponding ct from it. It comes with both existing conncount variants nf_conncount_count_skb() and nf_conncount_add_skb(). In addition remove the old API and adjust all the users to use the new one. This way, for each sk_buff struct it is possible to check if there is a ct present and already confirmed. If so, skip the add operation. Fixes: d265929 ("netfilter: nf_conncount: reduce unnecessary GC") Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent fe83133 commit be102eb

5 files changed

Lines changed: 142 additions & 103 deletions

File tree

include/net/netfilter/nf_conntrack_count.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,14 @@ struct nf_conncount_list {
1818
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen);
1919
void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data);
2020

21-
unsigned int nf_conncount_count(struct net *net,
22-
struct nf_conncount_data *data,
23-
const u32 *key,
24-
const struct nf_conntrack_tuple *tuple,
25-
const struct nf_conntrack_zone *zone);
26-
27-
int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
28-
const struct nf_conntrack_tuple *tuple,
29-
const struct nf_conntrack_zone *zone);
21+
unsigned int nf_conncount_count_skb(struct net *net,
22+
const struct sk_buff *skb,
23+
u16 l3num,
24+
struct nf_conncount_data *data,
25+
const u32 *key);
26+
27+
int nf_conncount_add_skb(struct net *net, const struct sk_buff *skb,
28+
u16 l3num, struct nf_conncount_list *list);
3029

3130
void nf_conncount_list_init(struct nf_conncount_list *list);
3231

net/netfilter/nf_conncount.c

Lines changed: 120 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -122,15 +122,65 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
122122
return ERR_PTR(-EAGAIN);
123123
}
124124

125+
static bool get_ct_or_tuple_from_skb(struct net *net,
126+
const struct sk_buff *skb,
127+
u16 l3num,
128+
struct nf_conn **ct,
129+
struct nf_conntrack_tuple *tuple,
130+
const struct nf_conntrack_zone **zone,
131+
bool *refcounted)
132+
{
133+
const struct nf_conntrack_tuple_hash *h;
134+
enum ip_conntrack_info ctinfo;
135+
struct nf_conn *found_ct;
136+
137+
found_ct = nf_ct_get(skb, &ctinfo);
138+
if (found_ct && !nf_ct_is_template(found_ct)) {
139+
*tuple = found_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
140+
*zone = nf_ct_zone(found_ct);
141+
*ct = found_ct;
142+
return true;
143+
}
144+
145+
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, net, tuple))
146+
return false;
147+
148+
if (found_ct)
149+
*zone = nf_ct_zone(found_ct);
150+
151+
h = nf_conntrack_find_get(net, *zone, tuple);
152+
if (!h)
153+
return true;
154+
155+
found_ct = nf_ct_tuplehash_to_ctrack(h);
156+
*refcounted = true;
157+
*ct = found_ct;
158+
159+
return true;
160+
}
161+
125162
static int __nf_conncount_add(struct net *net,
126-
struct nf_conncount_list *list,
127-
const struct nf_conntrack_tuple *tuple,
128-
const struct nf_conntrack_zone *zone)
163+
const struct sk_buff *skb,
164+
u16 l3num,
165+
struct nf_conncount_list *list)
129166
{
167+
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
130168
const struct nf_conntrack_tuple_hash *found;
131169
struct nf_conncount_tuple *conn, *conn_n;
170+
struct nf_conntrack_tuple tuple;
171+
struct nf_conn *ct = NULL;
132172
struct nf_conn *found_ct;
133173
unsigned int collect = 0;
174+
bool refcounted = false;
175+
176+
if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted))
177+
return -ENOENT;
178+
179+
if (ct && nf_ct_is_confirmed(ct)) {
180+
if (refcounted)
181+
nf_ct_put(ct);
182+
return 0;
183+
}
134184

135185
if ((u32)jiffies == list->last_gc)
136186
goto add_new_node;
@@ -144,10 +194,10 @@ static int __nf_conncount_add(struct net *net,
144194
if (IS_ERR(found)) {
145195
/* Not found, but might be about to be confirmed */
146196
if (PTR_ERR(found) == -EAGAIN) {
147-
if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
197+
if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
148198
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
149199
nf_ct_zone_id(zone, zone->dir))
150-
return 0; /* already exists */
200+
goto out_put; /* already exists */
151201
} else {
152202
collect++;
153203
}
@@ -156,7 +206,7 @@ static int __nf_conncount_add(struct net *net,
156206

157207
found_ct = nf_ct_tuplehash_to_ctrack(found);
158208

159-
if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
209+
if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
160210
nf_ct_zone_equal(found_ct, zone, zone->dir)) {
161211
/*
162212
* We should not see tuples twice unless someone hooks
@@ -165,7 +215,7 @@ static int __nf_conncount_add(struct net *net,
165215
* Attempt to avoid a re-add in this case.
166216
*/
167217
nf_ct_put(found_ct);
168-
return 0;
218+
goto out_put;
169219
} else if (already_closed(found_ct)) {
170220
/*
171221
* we do not care about connections which are
@@ -188,31 +238,35 @@ static int __nf_conncount_add(struct net *net,
188238
if (conn == NULL)
189239
return -ENOMEM;
190240

191-
conn->tuple = *tuple;
241+
conn->tuple = tuple;
192242
conn->zone = *zone;
193243
conn->cpu = raw_smp_processor_id();
194244
conn->jiffies32 = (u32)jiffies;
195245
list_add_tail(&conn->node, &list->head);
196246
list->count++;
197247
list->last_gc = (u32)jiffies;
248+
249+
out_put:
250+
if (refcounted)
251+
nf_ct_put(ct);
198252
return 0;
199253
}
200254

201-
int nf_conncount_add(struct net *net,
202-
struct nf_conncount_list *list,
203-
const struct nf_conntrack_tuple *tuple,
204-
const struct nf_conntrack_zone *zone)
255+
int nf_conncount_add_skb(struct net *net,
256+
const struct sk_buff *skb,
257+
u16 l3num,
258+
struct nf_conncount_list *list)
205259
{
206260
int ret;
207261

208262
/* check the saved connections */
209263
spin_lock_bh(&list->list_lock);
210-
ret = __nf_conncount_add(net, list, tuple, zone);
264+
ret = __nf_conncount_add(net, skb, l3num, list);
211265
spin_unlock_bh(&list->list_lock);
212266

213267
return ret;
214268
}
215-
EXPORT_SYMBOL_GPL(nf_conncount_add);
269+
EXPORT_SYMBOL_GPL(nf_conncount_add_skb);
216270

217271
void nf_conncount_list_init(struct nf_conncount_list *list)
218272
{
@@ -309,19 +363,22 @@ static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
309363

310364
static unsigned int
311365
insert_tree(struct net *net,
366+
const struct sk_buff *skb,
367+
u16 l3num,
312368
struct nf_conncount_data *data,
313369
struct rb_root *root,
314370
unsigned int hash,
315-
const u32 *key,
316-
const struct nf_conntrack_tuple *tuple,
317-
const struct nf_conntrack_zone *zone)
371+
const u32 *key)
318372
{
319373
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
374+
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
375+
bool do_gc = true, refcounted = false;
376+
unsigned int count = 0, gc_count = 0;
320377
struct rb_node **rbnode, *parent;
321-
struct nf_conncount_rb *rbconn;
378+
struct nf_conntrack_tuple tuple;
322379
struct nf_conncount_tuple *conn;
323-
unsigned int count = 0, gc_count = 0;
324-
bool do_gc = true;
380+
struct nf_conncount_rb *rbconn;
381+
struct nf_conn *ct = NULL;
325382

326383
spin_lock_bh(&nf_conncount_locks[hash]);
327384
restart:
@@ -340,7 +397,7 @@ insert_tree(struct net *net,
340397
} else {
341398
int ret;
342399

343-
ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
400+
ret = nf_conncount_add_skb(net, skb, l3num, &rbconn->list);
344401
if (ret)
345402
count = 0; /* hotdrop */
346403
else
@@ -364,41 +421,46 @@ insert_tree(struct net *net,
364421
goto restart;
365422
}
366423

367-
/* expected case: match, insert new node */
368-
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
369-
if (rbconn == NULL)
370-
goto out_unlock;
424+
if (get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted)) {
425+
/* expected case: match, insert new node */
426+
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
427+
if (rbconn == NULL)
428+
goto out_unlock;
371429

372-
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
373-
if (conn == NULL) {
374-
kmem_cache_free(conncount_rb_cachep, rbconn);
375-
goto out_unlock;
376-
}
430+
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
431+
if (conn == NULL) {
432+
kmem_cache_free(conncount_rb_cachep, rbconn);
433+
goto out_unlock;
434+
}
377435

378-
conn->tuple = *tuple;
379-
conn->zone = *zone;
380-
conn->cpu = raw_smp_processor_id();
381-
conn->jiffies32 = (u32)jiffies;
382-
memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
436+
conn->tuple = tuple;
437+
conn->zone = *zone;
438+
conn->cpu = raw_smp_processor_id();
439+
conn->jiffies32 = (u32)jiffies;
440+
memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
441+
442+
nf_conncount_list_init(&rbconn->list);
443+
list_add(&conn->node, &rbconn->list.head);
444+
count = 1;
445+
rbconn->list.count = count;
383446

384-
nf_conncount_list_init(&rbconn->list);
385-
list_add(&conn->node, &rbconn->list.head);
386-
count = 1;
387-
rbconn->list.count = count;
447+
rb_link_node_rcu(&rbconn->node, parent, rbnode);
448+
rb_insert_color(&rbconn->node, root);
388449

389-
rb_link_node_rcu(&rbconn->node, parent, rbnode);
390-
rb_insert_color(&rbconn->node, root);
450+
if (refcounted)
451+
nf_ct_put(ct);
452+
}
391453
out_unlock:
392454
spin_unlock_bh(&nf_conncount_locks[hash]);
393455
return count;
394456
}
395457

396458
static unsigned int
397459
count_tree(struct net *net,
460+
const struct sk_buff *skb,
461+
u16 l3num,
398462
struct nf_conncount_data *data,
399-
const u32 *key,
400-
const struct nf_conntrack_tuple *tuple,
401-
const struct nf_conntrack_zone *zone)
463+
const u32 *key)
402464
{
403465
struct rb_root *root;
404466
struct rb_node *parent;
@@ -422,7 +484,7 @@ count_tree(struct net *net,
422484
} else {
423485
int ret;
424486

425-
if (!tuple) {
487+
if (!skb) {
426488
nf_conncount_gc_list(net, &rbconn->list);
427489
return rbconn->list.count;
428490
}
@@ -437,7 +499,7 @@ count_tree(struct net *net,
437499
}
438500

439501
/* same source network -> be counted! */
440-
ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
502+
ret = __nf_conncount_add(net, skb, l3num, &rbconn->list);
441503
spin_unlock_bh(&rbconn->list.list_lock);
442504
if (ret)
443505
return 0; /* hotdrop */
@@ -446,10 +508,10 @@ count_tree(struct net *net,
446508
}
447509
}
448510

449-
if (!tuple)
511+
if (!skb)
450512
return 0;
451513

452-
return insert_tree(net, data, root, hash, key, tuple, zone);
514+
return insert_tree(net, skb, l3num, data, root, hash, key);
453515
}
454516

455517
static void tree_gc_worker(struct work_struct *work)
@@ -511,18 +573,19 @@ static void tree_gc_worker(struct work_struct *work)
511573
}
512574

513575
/* Count and return number of conntrack entries in 'net' with particular 'key'.
514-
* If 'tuple' is not null, insert it into the accounting data structure.
515-
* Call with RCU read lock.
576+
* If 'skb' is not null, insert the corresponding tuple into the accounting
577+
* data structure. Call with RCU read lock.
516578
*/
517-
unsigned int nf_conncount_count(struct net *net,
518-
struct nf_conncount_data *data,
519-
const u32 *key,
520-
const struct nf_conntrack_tuple *tuple,
521-
const struct nf_conntrack_zone *zone)
579+
unsigned int nf_conncount_count_skb(struct net *net,
580+
const struct sk_buff *skb,
581+
u16 l3num,
582+
struct nf_conncount_data *data,
583+
const u32 *key)
522584
{
523-
return count_tree(net, data, key, tuple, zone);
585+
return count_tree(net, skb, l3num, data, key);
586+
524587
}
525-
EXPORT_SYMBOL_GPL(nf_conncount_count);
588+
EXPORT_SYMBOL_GPL(nf_conncount_count_skb);
526589

527590
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen)
528591
{

net/netfilter/nft_connlimit.c

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,11 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
2424
const struct nft_pktinfo *pkt,
2525
const struct nft_set_ext *ext)
2626
{
27-
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
28-
const struct nf_conntrack_tuple *tuple_ptr;
29-
struct nf_conntrack_tuple tuple;
30-
enum ip_conntrack_info ctinfo;
31-
const struct nf_conn *ct;
3227
unsigned int count;
28+
int err;
3329

34-
tuple_ptr = &tuple;
35-
36-
ct = nf_ct_get(pkt->skb, &ctinfo);
37-
if (ct != NULL) {
38-
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
39-
zone = nf_ct_zone(ct);
40-
} else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
41-
nft_pf(pkt), nft_net(pkt), &tuple)) {
42-
regs->verdict.code = NF_DROP;
43-
return;
44-
}
45-
46-
if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
30+
err = nf_conncount_add_skb(nft_net(pkt), pkt->skb, nft_pf(pkt), priv->list);
31+
if (err) {
4732
regs->verdict.code = NF_DROP;
4833
return;
4934
}

0 commit comments

Comments
 (0)