Skip to content

Commit 782f268

Browse files
ummakynesFlorian Westphal
authored andcommitted
netfilter: nft_set_rbtree: validate element belonging to interval
The existing partial overlap detection does not check if the elements belong to the interval, eg. add element inet x y { 1.1.1.1-2.2.2.2, 4.4.4.4-5.5.5.5 } add element inet x y { 1.1.1.1-5.5.5.5 } => this should fail: ENOENT Similar situation occurs with deletions: add element inet x y { 1.1.1.1-2.2.2.2, 4.4.4.4-5.5.5.5} delete element inet x y { 1.1.1.1-5.5.5.5 } => this should fail: ENOENT This currently works via mitigation by nft in userspace, which is performing the overlap detection before sending the elements to the kernel. This requires a previous netlink dump of the set content which slows down incremental updates on interval sets, because a netlink set content dump is needed. This patch extends the existing overlap detection to track the most recent start element that already exists. The pointer to the existing start element is stored as a cookie (no pointer dereference is ever possible). If the end element is added and it already exists, then check that the existing end element is adjacent to the already existing start element. Similar logic applies to element deactivation. This patch also annotates the timestamp to identify if start cookie comes from an older batch, in such case reset it. Otherwise, a failing create element command leaves the start cookie in place, resulting in bogus error reporting. There is still a few more corner cases of overlap detection related to the open interval that are addressed in follow up patches. This is address an early design mistake where an interval is expressed as two elements, using the NFT_SET_ELEM_INTERVAL_END flag, instead of the more recent NFTA_SET_ELEM_KEY_END attribute that pipapo already uses. Fixes: 7c84d41 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion") Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Florian Westphal <fw@strlen.de>
1 parent 4780ec1 commit 782f268

1 file changed

Lines changed: 143 additions & 4 deletions

File tree

net/netfilter/nft_set_rbtree.c

Lines changed: 143 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@ struct nft_rbtree {
3333
rwlock_t lock;
3434
struct nft_array __rcu *array;
3535
struct nft_array *array_next;
36+
unsigned long start_rbe_cookie;
3637
unsigned long last_gc;
3738
struct list_head expired;
39+
u64 last_tstamp;
3840
};
3941

4042
struct nft_rbtree_elem {
@@ -263,16 +265,85 @@ static struct nft_rbtree_elem *nft_rbtree_prev_active(struct nft_rbtree_elem *rb
263265
return rb_entry(node, struct nft_rbtree_elem, node);
264266
}
265267

268+
static struct nft_rbtree_elem *
269+
__nft_rbtree_next_active(struct rb_node *node, u8 genmask)
270+
{
271+
struct nft_rbtree_elem *next_rbe;
272+
273+
while (node) {
274+
next_rbe = rb_entry(node, struct nft_rbtree_elem, node);
275+
if (!nft_set_elem_active(&next_rbe->ext, genmask)) {
276+
node = rb_next(node);
277+
continue;
278+
}
279+
280+
return next_rbe;
281+
}
282+
283+
return NULL;
284+
}
285+
286+
static struct nft_rbtree_elem *
287+
nft_rbtree_next_active(struct nft_rbtree_elem *rbe, u8 genmask)
288+
{
289+
return __nft_rbtree_next_active(rb_next(&rbe->node), genmask);
290+
}
291+
292+
static void nft_rbtree_maybe_reset_start_cookie(struct nft_rbtree *priv,
293+
u64 tstamp)
294+
{
295+
if (priv->last_tstamp != tstamp) {
296+
priv->start_rbe_cookie = 0;
297+
priv->last_tstamp = tstamp;
298+
}
299+
}
300+
301+
static void nft_rbtree_set_start_cookie(struct nft_rbtree *priv,
302+
const struct nft_rbtree_elem *rbe)
303+
{
304+
priv->start_rbe_cookie = (unsigned long)rbe;
305+
}
306+
307+
static bool nft_rbtree_cmp_start_cookie(struct nft_rbtree *priv,
308+
const struct nft_rbtree_elem *rbe)
309+
{
310+
return priv->start_rbe_cookie == (unsigned long)rbe;
311+
}
312+
313+
static bool nft_rbtree_insert_same_interval(const struct net *net,
314+
struct nft_rbtree *priv,
315+
struct nft_rbtree_elem *rbe)
316+
{
317+
u8 genmask = nft_genmask_next(net);
318+
struct nft_rbtree_elem *next_rbe;
319+
320+
if (!priv->start_rbe_cookie)
321+
return true;
322+
323+
next_rbe = nft_rbtree_next_active(rbe, genmask);
324+
if (next_rbe) {
325+
/* Closest start element differs from last element added. */
326+
if (nft_rbtree_interval_start(next_rbe) &&
327+
nft_rbtree_cmp_start_cookie(priv, next_rbe)) {
328+
priv->start_rbe_cookie = 0;
329+
return true;
330+
}
331+
}
332+
333+
priv->start_rbe_cookie = 0;
334+
335+
return false;
336+
}
337+
266338
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
267339
struct nft_rbtree_elem *new,
268-
struct nft_elem_priv **elem_priv)
340+
struct nft_elem_priv **elem_priv, u64 tstamp)
269341
{
270342
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL, *rbe_prev;
271343
struct rb_node *node, *next, *parent, **p, *first = NULL;
272344
struct nft_rbtree *priv = nft_set_priv(set);
273345
u8 cur_genmask = nft_genmask_cur(net);
274346
u8 genmask = nft_genmask_next(net);
275-
u64 tstamp = nft_net_tstamp(net);
276347
int d;
277348

278349
/* Descend the tree to search for an existing element greater than the
@@ -378,12 +449,18 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
378449
}
379450
}
380451

452+
if (nft_rbtree_interval_null(set, new))
453+
priv->start_rbe_cookie = 0;
454+
else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie)
455+
priv->start_rbe_cookie = 0;
456+
381457
/* - new start element matching existing start element: full overlap
382458
* reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
383459
*/
384460
if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
385461
nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
386462
*elem_priv = &rbe_ge->priv;
463+
nft_rbtree_set_start_cookie(priv, rbe_ge);
387464
return -EEXIST;
388465
}
389466

@@ -399,6 +476,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
399476
return -ECANCELED;
400477

401478
*elem_priv = &rbe_le->priv;
479+
480+
/* - start and end element belong to the same interval. */
481+
if (!nft_rbtree_insert_same_interval(net, priv, rbe_le))
482+
return -ENOTEMPTY;
483+
402484
return -EEXIST;
403485
}
404486

@@ -543,8 +625,11 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
543625
{
544626
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv);
545627
struct nft_rbtree *priv = nft_set_priv(set);
628+
u64 tstamp = nft_net_tstamp(net);
546629
int err;
547630

631+
nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
632+
548633
if (nft_array_may_resize(set) < 0)
549634
return -ENOMEM;
550635

@@ -555,7 +640,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
555640
cond_resched();
556641

557642
write_lock_bh(&priv->lock);
558-
err = __nft_rbtree_insert(net, set, rbe, elem_priv);
643+
err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp);
559644
write_unlock_bh(&priv->lock);
560645
} while (err == -EAGAIN);
561646

@@ -588,6 +673,48 @@ static void nft_rbtree_activate(const struct net *net,
588673
nft_clear(net, &rbe->ext);
589674
}
590675

676+
static struct nft_rbtree_elem *
677+
nft_rbtree_next_inactive(struct nft_rbtree_elem *rbe, u8 genmask)
678+
{
679+
struct nft_rbtree_elem *next_rbe;
680+
struct rb_node *node;
681+
682+
node = rb_next(&rbe->node);
683+
if (node) {
684+
next_rbe = rb_entry(node, struct nft_rbtree_elem, node);
685+
if (nft_rbtree_interval_start(next_rbe) &&
686+
!nft_set_elem_active(&next_rbe->ext, genmask))
687+
return next_rbe;
688+
}
689+
690+
return NULL;
691+
}
692+
693+
static bool nft_rbtree_deactivate_same_interval(const struct net *net,
694+
struct nft_rbtree *priv,
695+
struct nft_rbtree_elem *rbe)
696+
{
697+
u8 genmask = nft_genmask_next(net);
698+
struct nft_rbtree_elem *next_rbe;
699+
700+
if (!priv->start_rbe_cookie)
701+
return true;
702+
703+
next_rbe = nft_rbtree_next_inactive(rbe, genmask);
704+
if (next_rbe) {
705+
/* Closest start element differs from last element added. */
706+
if (nft_rbtree_interval_start(next_rbe) &&
707+
nft_rbtree_cmp_start_cookie(priv, next_rbe)) {
708+
priv->start_rbe_cookie = 0;
709+
return true;
710+
}
711+
}
712+
713+
priv->start_rbe_cookie = 0;
714+
715+
return false;
716+
}
717+
591718
static void nft_rbtree_flush(const struct net *net,
592719
const struct nft_set *set,
593720
struct nft_elem_priv *elem_priv)
@@ -602,12 +729,18 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
602729
const struct nft_set_elem *elem)
603730
{
604731
struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv);
605-
const struct nft_rbtree *priv = nft_set_priv(set);
732+
struct nft_rbtree *priv = nft_set_priv(set);
606733
const struct rb_node *parent = priv->root.rb_node;
607734
u8 genmask = nft_genmask_next(net);
608735
u64 tstamp = nft_net_tstamp(net);
609736
int d;
610737

738+
nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
739+
740+
if (nft_rbtree_interval_start(this) ||
741+
nft_rbtree_interval_null(set, this))
742+
priv->start_rbe_cookie = 0;
743+
611744
if (nft_array_may_resize(set) < 0)
612745
return NULL;
613746

@@ -635,6 +768,12 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
635768
parent = parent->rb_left;
636769
continue;
637770
}
771+
772+
if (nft_rbtree_interval_start(rbe))
773+
nft_rbtree_set_start_cookie(priv, rbe);
774+
else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe))
775+
return NULL;
776+
638777
nft_rbtree_flush(net, set, &rbe->priv);
639778
return &rbe->priv;
640779
}

0 commit comments

Comments
 (0)