Skip to content

Commit 84ec985

Browse files
ming4lidavejiang
authored andcommitted
cxl/mem: Fix no cxl_nvd during pmem region auto-assembling
When CXL subsystem is auto-assembling a pmem region during cxl endpoint port probing, always hit below calltrace. BUG: kernel NULL pointer dereference, address: 0000000000000078 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page RIP: 0010:cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] Call Trace: <TASK> ? __die+0x24/0x70 ? page_fault_oops+0x82/0x160 ? do_user_addr_fault+0x65/0x6b0 ? exc_page_fault+0x7d/0x170 ? asm_exc_page_fault+0x26/0x30 ? cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem] ? cxl_pmem_region_probe+0x1ac/0x360 [cxl_pmem] cxl_bus_probe+0x1b/0x60 [cxl_core] really_probe+0x173/0x410 ? __pfx___device_attach_driver+0x10/0x10 __driver_probe_device+0x80/0x170 driver_probe_device+0x1e/0x90 __device_attach_driver+0x90/0x120 bus_for_each_drv+0x84/0xe0 __device_attach+0xbc/0x1f0 bus_probe_device+0x90/0xa0 device_add+0x51c/0x710 devm_cxl_add_pmem_region+0x1b5/0x380 [cxl_core] cxl_bus_probe+0x1b/0x60 [cxl_core] The cxl_nvd of the memdev needs to be available during the pmem region probe. Currently the cxl_nvd is registered after the endpoint port probe. The endpoint probe, in the case of autoassembly of regions, can cause a pmem region probe requiring the not yet available cxl_nvd. Adjust the sequence so this dependency is met. This requires adding a port parameter to cxl_find_nvdimm_bridge() that can be used to query the ancestor root port. The endpoint port is not yet available, but will share a common ancestor with its parent, so start the query from there instead. Fixes: f17b558 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue") Co-developed-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Li Ming <ming4.li@intel.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://patch.msgid.link/20240612064423.2567625-1-ming4.li@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
1 parent 6ba59ff commit 84ec985

4 files changed

Lines changed: 23 additions & 16 deletions

File tree

drivers/cxl/core/pmem.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,14 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
6262
return is_cxl_nvdimm_bridge(dev);
6363
}
6464

65-
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
65+
/**
66+
* cxl_find_nvdimm_bridge() - find a bridge device relative to a port
67+
* @port: any descendant port of an nvdimm-bridge associated
68+
* root-cxl-port
69+
*/
70+
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port)
6671
{
67-
struct cxl_root *cxl_root __free(put_cxl_root) =
68-
find_cxl_root(cxlmd->endpoint);
72+
struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
6973
struct device *dev;
7074

7175
if (!cxl_root)
@@ -242,18 +246,20 @@ static void cxlmd_release_nvdimm(void *_cxlmd)
242246

243247
/**
244248
* devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
249+
* @parent_port: parent port for the (to be added) @cxlmd endpoint port
245250
* @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
246251
*
247252
* Return: 0 on success negative error code on failure.
248253
*/
249-
int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
254+
int devm_cxl_add_nvdimm(struct cxl_port *parent_port,
255+
struct cxl_memdev *cxlmd)
250256
{
251257
struct cxl_nvdimm_bridge *cxl_nvb;
252258
struct cxl_nvdimm *cxl_nvd;
253259
struct device *dev;
254260
int rc;
255261

256-
cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
262+
cxl_nvb = cxl_find_nvdimm_bridge(parent_port);
257263
if (!cxl_nvb)
258264
return -ENODEV;
259265

drivers/cxl/core/region.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2847,7 +2847,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
28472847
* bridge for one device is the same for all.
28482848
*/
28492849
if (i == 0) {
2850-
cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
2850+
cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint);
28512851
if (!cxl_nvb)
28522852
return -ENODEV;
28532853
cxlr->cxl_nvb = cxl_nvb;

drivers/cxl/cxl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -855,8 +855,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
855855
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
856856
bool is_cxl_nvdimm(struct device *dev);
857857
bool is_cxl_nvdimm_bridge(struct device *dev);
858-
int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
859-
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
858+
int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd);
859+
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port);
860860

861861
#ifdef CONFIG_CXL_REGION
862862
bool is_cxl_pmem_region(struct device *dev);

drivers/cxl/mem.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,15 @@ static int cxl_mem_probe(struct device *dev)
152152
return -ENXIO;
153153
}
154154

155+
if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
156+
rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
157+
if (rc) {
158+
if (rc == -ENODEV)
159+
dev_info(dev, "PMEM disabled by platform\n");
160+
return rc;
161+
}
162+
}
163+
155164
if (dport->rch)
156165
endpoint_parent = parent_port->uport_dev;
157166
else
@@ -174,14 +183,6 @@ static int cxl_mem_probe(struct device *dev)
174183
if (rc)
175184
return rc;
176185

177-
if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
178-
rc = devm_cxl_add_nvdimm(cxlmd);
179-
if (rc == -ENODEV)
180-
dev_info(dev, "PMEM disabled by platform\n");
181-
else
182-
return rc;
183-
}
184-
185186
/*
186187
* The kernel may be operating out of CXL memory on this device,
187188
* there is no spec defined way to determine whether this device

0 commit comments

Comments
 (0)