Skip to content

Commit aff1366

Browse files
Florian WestphalSasha Levin
authored andcommitted
netfilter: nft_set_pipapo: split gc into unlink and reclaim phase
[ Upstream commit 9df9578 ] Yiming Qian reports Use-after-free in the pipapo set type: Under a large number of expired elements, commit-time GC can run for a very long time in a non-preemptible context, triggering soft lockup warnings and RCU stall reports (local denial of service). We must split GC in an unlink and a reclaim phase. We cannot queue elements for freeing until pointers have been swapped. Expired elements are still exposed to both the packet path and userspace dumpers via the live copy of the data structure. call_rcu() does not protect us: dump operations or element lookups starting after call_rcu has fired can still observe the free'd element, unless the commit phase has made enough progress to swap the clone and live pointers before any new reader has picked up the old version. This a similar approach as done recently for the rbtree backend in commit 35f83a7 ("netfilter: nft_set_rbtree: don't gc elements on insert"). Fixes: 3c4287f ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: Yiming Qian <yimingqian591@gmail.com> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent b7f6728 commit aff1366

4 files changed

Lines changed: 50 additions & 13 deletions

File tree

include/net/netfilter/nf_tables.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,6 +1860,11 @@ struct nft_trans_gc {
18601860
struct rcu_head rcu;
18611861
};
18621862

1863+
static inline int nft_trans_gc_space(const struct nft_trans_gc *trans)
1864+
{
1865+
return NFT_TRANS_GC_BATCHCOUNT - trans->count;
1866+
}
1867+
18631868
static inline void nft_ctx_update(struct nft_ctx *ctx,
18641869
const struct nft_trans *trans)
18651870
{

net/netfilter/nf_tables_api.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10480,11 +10480,6 @@ static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
1048010480
schedule_work(&trans_gc_work);
1048110481
}
1048210482

10483-
static int nft_trans_gc_space(struct nft_trans_gc *trans)
10484-
{
10485-
return NFT_TRANS_GC_BATCHCOUNT - trans->count;
10486-
}
10487-
1048810483
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
1048910484
unsigned int gc_seq, gfp_t gfp)
1049010485
{

net/netfilter/nft_set_pipapo.c

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1681,11 +1681,11 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
16811681
}
16821682

16831683
/**
1684-
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
1684+
* pipapo_gc_scan() - Drop expired entries from set and link them to gc list
16851685
* @set: nftables API set representation
16861686
* @m: Matching data
16871687
*/
1688-
static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
1688+
static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m)
16891689
{
16901690
struct nft_pipapo *priv = nft_set_priv(set);
16911691
struct net *net = read_pnet(&set->net);
@@ -1698,6 +1698,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
16981698
if (!gc)
16991699
return;
17001700

1701+
list_add(&gc->list, &priv->gc_head);
1702+
17011703
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
17021704
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
17031705
const struct nft_pipapo_field *f;
@@ -1725,9 +1727,13 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
17251727
* NFT_SET_ELEM_DEAD_BIT.
17261728
*/
17271729
if (__nft_set_elem_expired(&e->ext, tstamp)) {
1728-
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
1729-
if (!gc)
1730-
return;
1730+
if (!nft_trans_gc_space(gc)) {
1731+
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
1732+
if (!gc)
1733+
return;
1734+
1735+
list_add(&gc->list, &priv->gc_head);
1736+
}
17311737

17321738
nft_pipapo_gc_deactivate(net, set, e);
17331739
pipapo_drop(m, rulemap);
@@ -1741,10 +1747,30 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
17411747
}
17421748
}
17431749

1744-
gc = nft_trans_gc_catchall_sync(gc);
1750+
priv->last_gc = jiffies;
1751+
}
1752+
1753+
/**
1754+
* pipapo_gc_queue() - Free expired elements
1755+
* @set: nftables API set representation
1756+
*/
1757+
static void pipapo_gc_queue(struct nft_set *set)
1758+
{
1759+
struct nft_pipapo *priv = nft_set_priv(set);
1760+
struct nft_trans_gc *gc, *next;
1761+
1762+
/* always do a catchall cycle: */
1763+
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
17451764
if (gc) {
1765+
gc = nft_trans_gc_catchall_sync(gc);
1766+
if (gc)
1767+
nft_trans_gc_queue_sync_done(gc);
1768+
}
1769+
1770+
/* always purge queued gc elements. */
1771+
list_for_each_entry_safe(gc, next, &priv->gc_head, list) {
1772+
list_del(&gc->list);
17461773
nft_trans_gc_queue_sync_done(gc);
1747-
priv->last_gc = jiffies;
17481774
}
17491775
}
17501776

@@ -1798,6 +1824,10 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
17981824
*
17991825
* We also need to create a new working copy for subsequent insertions and
18001826
* deletions.
1827+
*
1828+
* After the live copy has been replaced by the clone, we can safely queue
1829+
* expired elements that have been collected by pipapo_gc_scan() for
1830+
* memory reclaim.
18011831
*/
18021832
static void nft_pipapo_commit(struct nft_set *set)
18031833
{
@@ -1808,14 +1838,16 @@ static void nft_pipapo_commit(struct nft_set *set)
18081838
return;
18091839

18101840
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
1811-
pipapo_gc(set, priv->clone);
1841+
pipapo_gc_scan(set, priv->clone);
18121842

18131843
old = rcu_replace_pointer(priv->match, priv->clone,
18141844
nft_pipapo_transaction_mutex_held(set));
18151845
priv->clone = NULL;
18161846

18171847
if (old)
18181848
call_rcu(&old->rcu, pipapo_reclaim_match);
1849+
1850+
pipapo_gc_queue(set);
18191851
}
18201852

18211853
static void nft_pipapo_abort(const struct nft_set *set)
@@ -2280,6 +2312,7 @@ static int nft_pipapo_init(const struct nft_set *set,
22802312
f->mt = NULL;
22812313
}
22822314

2315+
INIT_LIST_HEAD(&priv->gc_head);
22832316
rcu_assign_pointer(priv->match, m);
22842317

22852318
return 0;
@@ -2329,6 +2362,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
23292362
struct nft_pipapo *priv = nft_set_priv(set);
23302363
struct nft_pipapo_match *m;
23312364

2365+
WARN_ON_ONCE(!list_empty(&priv->gc_head));
2366+
23322367
m = rcu_dereference_protected(priv->match, true);
23332368

23342369
if (priv->clone) {

net/netfilter/nft_set_pipapo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,14 @@ struct nft_pipapo_match {
156156
* @clone: Copy where pending insertions and deletions are kept
157157
* @width: Total bytes to be matched for one packet, including padding
158158
* @last_gc: Timestamp of last garbage collection run, jiffies
159+
* @gc_head: list of nft_trans_gc to queue up for mem reclaim
159160
*/
160161
struct nft_pipapo {
161162
struct nft_pipapo_match __rcu *match;
162163
struct nft_pipapo_match *clone;
163164
int width;
164165
unsigned long last_gc;
166+
struct list_head gc_head;
165167
};
166168

167169
struct nft_pipapo_elem;

0 commit comments

Comments
 (0)