Skip to content

Commit e1f4a52

Browse files
mark-blochrleon
authored andcommitted
RDMA/mlx5: Create an indirect flow table for steering anchor
A misbehaved user can create a steering anchor that points to a kernel flow table and then destroy the anchor without freeing the associated STC. This creates a problem as the kernel can't destroy the flow table since there is still a reference to it. As a result, this can exhaust all available flow table resources, preventing other users from using the RDMA device. To prevent this problem, a solution is implemented where a special flow table with two steering rules is created when a user creates a steering anchor for the first time. The rules include one that drops all traffic and another that points to the kernel flow table. If the steering anchor is destroyed, only the rule pointing to the kernel's flow table is removed. Any traffic reaching the special flow table after that is dropped. Since the special flow table is not destroyed when the steering anchor is destroyed, any issues are prevented from occurring. The remaining resources are only destroyed when the RDMA device is destroyed, which happens after all DEVX objects are freed, including the STCs, thus mitigating the issue. Fixes: 0c6ab0c ("RDMA/mlx5: Expose steering anchor to userspace") Signed-off-by: Mark Bloch <mbloch@nvidia.com> Reviewed-by: Maor Gottlieb <maorg@nvidia.com> Link: https://lore.kernel.org/r/b4a88a871d651fa4e8f98d552553c1cfe9ba2cd6.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org>
1 parent ee4d269 commit e1f4a52

3 files changed

Lines changed: 296 additions & 7 deletions

File tree

drivers/infiniband/hw/mlx5/fs.c

Lines changed: 269 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -695,8 +695,6 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
695695
struct mlx5_flow_table_attr ft_attr = {};
696696
struct mlx5_flow_table *ft;
697697

698-
if (mlx5_ib_shared_ft_allowed(&dev->ib_dev))
699-
ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
700698
ft_attr.prio = priority;
701699
ft_attr.max_fte = num_entries;
702700
ft_attr.flags = flags;
@@ -2025,6 +2023,237 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
20252023
return 0;
20262024
}
20272025

2026+
static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
2027+
struct mlx5_ib_flow_prio *ft_prio,
2028+
enum mlx5_flow_namespace_type ns_type)
2029+
{
2030+
struct mlx5_flow_table_attr ft_attr = {};
2031+
struct mlx5_flow_namespace *ns;
2032+
struct mlx5_flow_table *ft;
2033+
2034+
if (ft_prio->anchor.ft)
2035+
return 0;
2036+
2037+
ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
2038+
if (!ns)
2039+
return -EOPNOTSUPP;
2040+
2041+
ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
2042+
ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
2043+
ft_attr.prio = 0;
2044+
ft_attr.max_fte = 2;
2045+
ft_attr.level = 1;
2046+
2047+
ft = mlx5_create_flow_table(ns, &ft_attr);
2048+
if (IS_ERR(ft))
2049+
return PTR_ERR(ft);
2050+
2051+
ft_prio->anchor.ft = ft;
2052+
2053+
return 0;
2054+
}
2055+
2056+
static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
2057+
{
2058+
if (ft_prio->anchor.ft) {
2059+
mlx5_destroy_flow_table(ft_prio->anchor.ft);
2060+
ft_prio->anchor.ft = NULL;
2061+
}
2062+
}
2063+
2064+
static int
2065+
steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2066+
{
2067+
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2068+
struct mlx5_flow_group *fg;
2069+
void *flow_group_in;
2070+
int err = 0;
2071+
2072+
if (ft_prio->anchor.fg_drop)
2073+
return 0;
2074+
2075+
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2076+
if (!flow_group_in)
2077+
return -ENOMEM;
2078+
2079+
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
2080+
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
2081+
2082+
fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2083+
if (IS_ERR(fg)) {
2084+
err = PTR_ERR(fg);
2085+
goto out;
2086+
}
2087+
2088+
ft_prio->anchor.fg_drop = fg;
2089+
2090+
out:
2091+
kvfree(flow_group_in);
2092+
2093+
return err;
2094+
}
2095+
2096+
static void
2097+
steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
2098+
{
2099+
if (ft_prio->anchor.fg_drop) {
2100+
mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
2101+
ft_prio->anchor.fg_drop = NULL;
2102+
}
2103+
}
2104+
2105+
static int
2106+
steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2107+
{
2108+
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
2109+
struct mlx5_flow_group *fg;
2110+
void *flow_group_in;
2111+
int err = 0;
2112+
2113+
if (ft_prio->anchor.fg_goto_table)
2114+
return 0;
2115+
2116+
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
2117+
if (!flow_group_in)
2118+
return -ENOMEM;
2119+
2120+
fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
2121+
if (IS_ERR(fg)) {
2122+
err = PTR_ERR(fg);
2123+
goto out;
2124+
}
2125+
ft_prio->anchor.fg_goto_table = fg;
2126+
2127+
out:
2128+
kvfree(flow_group_in);
2129+
2130+
return err;
2131+
}
2132+
2133+
static void
2134+
steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2135+
{
2136+
if (ft_prio->anchor.fg_goto_table) {
2137+
mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
2138+
ft_prio->anchor.fg_goto_table = NULL;
2139+
}
2140+
}
2141+
2142+
static int
2143+
steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2144+
{
2145+
struct mlx5_flow_act flow_act = {};
2146+
struct mlx5_flow_handle *handle;
2147+
2148+
if (ft_prio->anchor.rule_drop)
2149+
return 0;
2150+
2151+
flow_act.fg = ft_prio->anchor.fg_drop;
2152+
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
2153+
2154+
handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2155+
NULL, 0);
2156+
if (IS_ERR(handle))
2157+
return PTR_ERR(handle);
2158+
2159+
ft_prio->anchor.rule_drop = handle;
2160+
2161+
return 0;
2162+
}
2163+
2164+
static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
2165+
{
2166+
if (ft_prio->anchor.rule_drop) {
2167+
mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
2168+
ft_prio->anchor.rule_drop = NULL;
2169+
}
2170+
}
2171+
2172+
static int
2173+
steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2174+
{
2175+
struct mlx5_flow_destination dest = {};
2176+
struct mlx5_flow_act flow_act = {};
2177+
struct mlx5_flow_handle *handle;
2178+
2179+
if (ft_prio->anchor.rule_goto_table)
2180+
return 0;
2181+
2182+
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2183+
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
2184+
flow_act.fg = ft_prio->anchor.fg_goto_table;
2185+
2186+
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
2187+
dest.ft = ft_prio->flow_table;
2188+
2189+
handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
2190+
&dest, 1);
2191+
if (IS_ERR(handle))
2192+
return PTR_ERR(handle);
2193+
2194+
ft_prio->anchor.rule_goto_table = handle;
2195+
2196+
return 0;
2197+
}
2198+
2199+
static void
2200+
steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
2201+
{
2202+
if (ft_prio->anchor.rule_goto_table) {
2203+
mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
2204+
ft_prio->anchor.rule_goto_table = NULL;
2205+
}
2206+
}
2207+
2208+
static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
2209+
struct mlx5_ib_flow_prio *ft_prio,
2210+
enum mlx5_flow_namespace_type ns_type)
2211+
{
2212+
int err;
2213+
2214+
err = steering_anchor_create_ft(dev, ft_prio, ns_type);
2215+
if (err)
2216+
return err;
2217+
2218+
err = steering_anchor_create_fg_drop(ft_prio);
2219+
if (err)
2220+
goto destroy_ft;
2221+
2222+
err = steering_anchor_create_fg_goto_table(ft_prio);
2223+
if (err)
2224+
goto destroy_fg_drop;
2225+
2226+
err = steering_anchor_create_rule_drop(ft_prio);
2227+
if (err)
2228+
goto destroy_fg_goto_table;
2229+
2230+
err = steering_anchor_create_rule_goto_table(ft_prio);
2231+
if (err)
2232+
goto destroy_rule_drop;
2233+
2234+
return 0;
2235+
2236+
destroy_rule_drop:
2237+
steering_anchor_destroy_rule_drop(ft_prio);
2238+
destroy_fg_goto_table:
2239+
steering_anchor_destroy_fg_goto_table(ft_prio);
2240+
destroy_fg_drop:
2241+
steering_anchor_destroy_fg_drop(ft_prio);
2242+
destroy_ft:
2243+
steering_anchor_destroy_ft(ft_prio);
2244+
2245+
return err;
2246+
}
2247+
2248+
static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
2249+
{
2250+
steering_anchor_destroy_rule_goto_table(ft_prio);
2251+
steering_anchor_destroy_rule_drop(ft_prio);
2252+
steering_anchor_destroy_fg_goto_table(ft_prio);
2253+
steering_anchor_destroy_fg_drop(ft_prio);
2254+
steering_anchor_destroy_ft(ft_prio);
2255+
}
2256+
20282257
static int steering_anchor_cleanup(struct ib_uobject *uobject,
20292258
enum rdma_remove_reason why,
20302259
struct uverbs_attr_bundle *attrs)
@@ -2035,13 +2264,34 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject,
20352264
return -EBUSY;
20362265

20372266
mutex_lock(&obj->dev->flow_db->lock);
2267+
if (!--obj->ft_prio->anchor.rule_goto_table_ref)
2268+
steering_anchor_destroy_rule_goto_table(obj->ft_prio);
2269+
20382270
put_flow_table(obj->dev, obj->ft_prio, true);
20392271
mutex_unlock(&obj->dev->flow_db->lock);
20402272

20412273
kfree(obj);
20422274
return 0;
20432275
}
20442276

2277+
static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
2278+
int count)
2279+
{
2280+
while (count--)
2281+
mlx5_steering_anchor_destroy_res(&prio[count]);
2282+
}
2283+
2284+
void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
2285+
{
2286+
fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
2287+
fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
2288+
fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
2289+
fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
2290+
fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
2291+
fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
2292+
fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
2293+
}
2294+
20452295
static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
20462296
struct mlx5_ib_flow_matcher *obj)
20472297
{
@@ -2182,21 +2432,31 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
21822432
return -ENOMEM;
21832433

21842434
mutex_lock(&dev->flow_db->lock);
2435+
21852436
ft_prio = _get_flow_table(dev, priority, ns_type, 0);
21862437
if (IS_ERR(ft_prio)) {
2187-
mutex_unlock(&dev->flow_db->lock);
21882438
err = PTR_ERR(ft_prio);
21892439
goto free_obj;
21902440
}
21912441

21922442
ft_prio->refcount++;
2193-
ft_id = mlx5_flow_table_id(ft_prio->flow_table);
2194-
mutex_unlock(&dev->flow_db->lock);
2443+
2444+
if (!ft_prio->anchor.rule_goto_table_ref) {
2445+
err = steering_anchor_create_res(dev, ft_prio, ns_type);
2446+
if (err)
2447+
goto put_flow_table;
2448+
}
2449+
2450+
ft_prio->anchor.rule_goto_table_ref++;
2451+
2452+
ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
21952453

21962454
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
21972455
&ft_id, sizeof(ft_id));
21982456
if (err)
2199-
goto put_flow_table;
2457+
goto destroy_res;
2458+
2459+
mutex_unlock(&dev->flow_db->lock);
22002460

22012461
uobj->object = obj;
22022462
obj->dev = dev;
@@ -2205,8 +2465,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
22052465

22062466
return 0;
22072467

2468+
destroy_res:
2469+
--ft_prio->anchor.rule_goto_table_ref;
2470+
mlx5_steering_anchor_destroy_res(ft_prio);
22082471
put_flow_table:
2209-
mutex_lock(&dev->flow_db->lock);
22102472
put_flow_table(dev, ft_prio, true);
22112473
mutex_unlock(&dev->flow_db->lock);
22122474
free_obj:

drivers/infiniband/hw/mlx5/fs.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
1212
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
13+
void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
1314
#else
1415
static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
1516
{
@@ -21,9 +22,24 @@ static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
2122
mutex_init(&dev->flow_db->lock);
2223
return 0;
2324
}
25+
26+
inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {}
2427
#endif
28+
2529
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
2630
{
31+
/* When a steering anchor is created, a special flow table is also
32+
* created for the user to reference. Since the user can reference it,
33+
* the kernel cannot trust that when the user destroys the steering
34+
* anchor, they no longer reference the flow table.
35+
*
36+
* To address this issue, when a user destroys a steering anchor, only
37+
* the flow steering rule in the table is destroyed, but the table
38+
* itself is kept to deal with the above scenario. The remaining
39+
* resources are only removed when the RDMA device is destroyed, which
40+
* is a safe assumption that all references are gone.
41+
*/
42+
mlx5_ib_fs_cleanup_anchor(dev);
2743
kfree(dev->flow_db);
2844
}
2945
#endif /* _MLX5_IB_FS_H */

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,19 @@ enum {
237237
#define MLX5_IB_NUM_SNIFFER_FTS 2
238238
#define MLX5_IB_NUM_EGRESS_FTS 1
239239
#define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS
240+
241+
struct mlx5_ib_anchor {
242+
struct mlx5_flow_table *ft;
243+
struct mlx5_flow_group *fg_goto_table;
244+
struct mlx5_flow_group *fg_drop;
245+
struct mlx5_flow_handle *rule_goto_table;
246+
struct mlx5_flow_handle *rule_drop;
247+
unsigned int rule_goto_table_ref;
248+
};
249+
240250
struct mlx5_ib_flow_prio {
241251
struct mlx5_flow_table *flow_table;
252+
struct mlx5_ib_anchor anchor;
242253
unsigned int refcount;
243254
};
244255

0 commit comments

Comments
 (0)