Skip to content

Commit 43bc0aa

Browse files
rpptweiny2
authored andcommitted
nvdimm: allow exposing RAM carveouts as NVDIMM DIMM devices
There are use cases, for example virtual machine hosts, that create "persistent" memory regions using memmap= option on x86 or dummy pmem-region device tree nodes on DT based systems. Both these options are inflexible because they create static regions and the layout of the "persistent" memory cannot be adjusted without reboot and sometimes they even require firmware update. Add a ramdax driver that allows creation of DIMM devices on top of E820_TYPE_PRAM regions and devicetree pmem-region nodes. The DIMMs support label space management on the "device" and provide a flexible way to access RAM using fsdax and devdax. Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Link: https://patch.msgid.link/20251026153841.752061-2-rppt@kernel.org Signed-off-by: Ira Weiny <ira.weiny@intel.com>
1 parent 6146a0f commit 43bc0aa

3 files changed

Lines changed: 302 additions & 0 deletions

File tree

drivers/nvdimm/Kconfig

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,25 @@ config OF_PMEM
9797

9898
Select Y if unsure.
9999

100+
config RAMDAX
101+
tristate "Support persistent memory interfaces on RAM carveouts"
102+
depends on X86_PMEM_LEGACY || OF || COMPILE_TEST
103+
default LIBNVDIMM
104+
help
105+
Allows creation of DAX devices on RAM carveouts.
106+
107+
Memory ranges that are manually specified by the
108+
'memmap=nn[KMG]!ss[KMG]' kernel command line or defined by dummy
109+
pmem-region device tree nodes would be managed by this driver as DIMM
110+
devices with support for dynamic layout of namespaces.
111+
The driver steals 128K in the end of the memmap range for the
112+
namespace management. This allows supporting up to 509 namespaces
113+
(see 'ndctl create-namespace --help').
114+
The driver should be force bound to e820_pmem or pmem-region platform
115+
devices using 'driver_override' device attribute.
116+
117+
Select N if unsure.
118+
100119
config NVDIMM_KEYS
101120
def_bool y
102121
depends on ENCRYPTED_KEYS

drivers/nvdimm/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
55
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
66
obj-$(CONFIG_OF_PMEM) += of_pmem.o
77
obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o
8+
obj-$(CONFIG_RAMDAX) += ramdax.o
89

910
nd_pmem-y := pmem.o
1011

drivers/nvdimm/ramdax.c

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Copyright (c) 2025, Mike Rapoport, Microsoft
4+
*
5+
* Based on e820 pmem driver:
6+
* Copyright (c) 2015, Christoph Hellwig.
7+
* Copyright (c) 2015, Intel Corporation.
8+
*/
9+
#include <linux/platform_device.h>
10+
#include <linux/memory_hotplug.h>
11+
#include <linux/libnvdimm.h>
12+
#include <linux/module.h>
13+
#include <linux/numa.h>
14+
#include <linux/slab.h>
15+
#include <linux/io.h>
16+
#include <linux/of.h>
17+
18+
#include <uapi/linux/ndctl.h>
19+
20+
#define LABEL_AREA_SIZE SZ_128K
21+
22+
struct ramdax_dimm {
23+
struct nvdimm *nvdimm;
24+
void *label_area;
25+
};
26+
27+
static void ramdax_remove(struct platform_device *pdev)
28+
{
29+
struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev);
30+
31+
nvdimm_bus_unregister(nvdimm_bus);
32+
}
33+
34+
static int ramdax_register_region(struct resource *res,
35+
struct nvdimm *nvdimm,
36+
struct nvdimm_bus *nvdimm_bus)
37+
{
38+
struct nd_mapping_desc mapping;
39+
struct nd_region_desc ndr_desc;
40+
struct nd_interleave_set *nd_set;
41+
int nid = phys_to_target_node(res->start);
42+
43+
nd_set = kzalloc(sizeof(*nd_set), GFP_KERNEL);
44+
if (!nd_set)
45+
return -ENOMEM;
46+
47+
nd_set->cookie1 = 0xcafebeefcafebeef;
48+
nd_set->cookie2 = nd_set->cookie1;
49+
nd_set->altcookie = nd_set->cookie1;
50+
51+
memset(&mapping, 0, sizeof(mapping));
52+
mapping.nvdimm = nvdimm;
53+
mapping.start = 0;
54+
mapping.size = resource_size(res) - LABEL_AREA_SIZE;
55+
56+
memset(&ndr_desc, 0, sizeof(ndr_desc));
57+
ndr_desc.res = res;
58+
ndr_desc.numa_node = numa_map_to_online_node(nid);
59+
ndr_desc.target_node = nid;
60+
ndr_desc.num_mappings = 1;
61+
ndr_desc.mapping = &mapping;
62+
ndr_desc.nd_set = nd_set;
63+
64+
if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
65+
goto err_free_nd_set;
66+
67+
return 0;
68+
69+
err_free_nd_set:
70+
kfree(nd_set);
71+
return -ENXIO;
72+
}
73+
74+
static int ramdax_register_dimm(struct resource *res, void *data)
75+
{
76+
resource_size_t start = res->start;
77+
resource_size_t size = resource_size(res);
78+
unsigned long flags = 0, cmd_mask = 0;
79+
struct nvdimm_bus *nvdimm_bus = data;
80+
struct ramdax_dimm *dimm;
81+
int err;
82+
83+
dimm = kzalloc(sizeof(*dimm), GFP_KERNEL);
84+
if (!dimm)
85+
return -ENOMEM;
86+
87+
dimm->label_area = memremap(start + size - LABEL_AREA_SIZE,
88+
LABEL_AREA_SIZE, MEMREMAP_WB);
89+
if (!dimm->label_area) {
90+
err = -ENOMEM;
91+
goto err_free_dimm;
92+
}
93+
94+
set_bit(NDD_LABELING, &flags);
95+
set_bit(NDD_REGISTER_SYNC, &flags);
96+
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
97+
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
98+
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
99+
dimm->nvdimm = nvdimm_create(nvdimm_bus, dimm,
100+
/* dimm_attribute_groups */ NULL,
101+
flags, cmd_mask, 0, NULL);
102+
if (!dimm->nvdimm) {
103+
err = -ENOMEM;
104+
goto err_unmap_label;
105+
}
106+
107+
err = ramdax_register_region(res, dimm->nvdimm, nvdimm_bus);
108+
if (err)
109+
goto err_remove_nvdimm;
110+
111+
return 0;
112+
113+
err_remove_nvdimm:
114+
nvdimm_delete(dimm->nvdimm);
115+
err_unmap_label:
116+
memunmap(dimm->label_area);
117+
err_free_dimm:
118+
kfree(dimm);
119+
return err;
120+
}
121+
122+
static int ramdax_get_config_size(struct nvdimm *nvdimm, int buf_len,
123+
struct nd_cmd_get_config_size *cmd)
124+
{
125+
if (sizeof(*cmd) > buf_len)
126+
return -EINVAL;
127+
128+
*cmd = (struct nd_cmd_get_config_size){
129+
.status = 0,
130+
.config_size = LABEL_AREA_SIZE,
131+
.max_xfer = 8,
132+
};
133+
134+
return 0;
135+
}
136+
137+
static int ramdax_get_config_data(struct nvdimm *nvdimm, int buf_len,
138+
struct nd_cmd_get_config_data_hdr *cmd)
139+
{
140+
struct ramdax_dimm *dimm = nvdimm_provider_data(nvdimm);
141+
142+
if (sizeof(*cmd) > buf_len)
143+
return -EINVAL;
144+
if (struct_size(cmd, out_buf, cmd->in_length) > buf_len)
145+
return -EINVAL;
146+
if (cmd->in_offset + cmd->in_length > LABEL_AREA_SIZE)
147+
return -EINVAL;
148+
149+
memcpy(cmd->out_buf, dimm->label_area + cmd->in_offset, cmd->in_length);
150+
151+
return 0;
152+
}
153+
154+
static int ramdax_set_config_data(struct nvdimm *nvdimm, int buf_len,
155+
struct nd_cmd_set_config_hdr *cmd)
156+
{
157+
struct ramdax_dimm *dimm = nvdimm_provider_data(nvdimm);
158+
159+
if (sizeof(*cmd) > buf_len)
160+
return -EINVAL;
161+
if (struct_size(cmd, in_buf, cmd->in_length) > buf_len)
162+
return -EINVAL;
163+
if (cmd->in_offset + cmd->in_length > LABEL_AREA_SIZE)
164+
return -EINVAL;
165+
166+
memcpy(dimm->label_area + cmd->in_offset, cmd->in_buf, cmd->in_length);
167+
168+
return 0;
169+
}
170+
171+
static int ramdax_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd,
172+
void *buf, unsigned int buf_len)
173+
{
174+
unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
175+
176+
if (!test_bit(cmd, &cmd_mask))
177+
return -ENOTTY;
178+
179+
switch (cmd) {
180+
case ND_CMD_GET_CONFIG_SIZE:
181+
return ramdax_get_config_size(nvdimm, buf_len, buf);
182+
case ND_CMD_GET_CONFIG_DATA:
183+
return ramdax_get_config_data(nvdimm, buf_len, buf);
184+
case ND_CMD_SET_CONFIG_DATA:
185+
return ramdax_set_config_data(nvdimm, buf_len, buf);
186+
default:
187+
return -ENOTTY;
188+
}
189+
}
190+
191+
static int ramdax_ctl(struct nvdimm_bus_descriptor *nd_desc,
192+
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
193+
unsigned int buf_len, int *cmd_rc)
194+
{
195+
/*
196+
* No firmware response to translate, let the transport error
197+
* code take precedence.
198+
*/
199+
*cmd_rc = 0;
200+
201+
if (!nvdimm)
202+
return -ENOTTY;
203+
return ramdax_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
204+
}
205+
206+
#ifdef CONFIG_OF
207+
static const struct of_device_id ramdax_of_matches[] = {
208+
{ .compatible = "pmem-region", },
209+
{ },
210+
};
211+
#endif
212+
213+
static int ramdax_probe_of(struct platform_device *pdev,
214+
struct nvdimm_bus *bus, struct device_node *np)
215+
{
216+
int err;
217+
218+
if (!of_match_node(ramdax_of_matches, np))
219+
return -ENODEV;
220+
221+
for (int i = 0; i < pdev->num_resources; i++) {
222+
err = ramdax_register_dimm(&pdev->resource[i], bus);
223+
if (err)
224+
goto err_unregister;
225+
}
226+
227+
return 0;
228+
229+
err_unregister:
230+
/*
231+
* FIXME: should we unregister the dimms that were registered
232+
* successfully
233+
*/
234+
return err;
235+
}
236+
237+
static int ramdax_probe(struct platform_device *pdev)
238+
{
239+
static struct nvdimm_bus_descriptor nd_desc;
240+
struct device *dev = &pdev->dev;
241+
struct nvdimm_bus *nvdimm_bus;
242+
struct device_node *np;
243+
int rc = -ENXIO;
244+
245+
nd_desc.provider_name = "ramdax";
246+
nd_desc.module = THIS_MODULE;
247+
nd_desc.ndctl = ramdax_ctl;
248+
nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
249+
if (!nvdimm_bus)
250+
goto err;
251+
252+
np = dev_of_node(&pdev->dev);
253+
if (np)
254+
rc = ramdax_probe_of(pdev, nvdimm_bus, np);
255+
else
256+
rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY,
257+
IORESOURCE_MEM, 0, -1, nvdimm_bus,
258+
ramdax_register_dimm);
259+
if (rc)
260+
goto err;
261+
262+
platform_set_drvdata(pdev, nvdimm_bus);
263+
264+
return 0;
265+
err:
266+
nvdimm_bus_unregister(nvdimm_bus);
267+
return rc;
268+
}
269+
270+
static struct platform_driver ramdax_driver = {
271+
.probe = ramdax_probe,
272+
.remove = ramdax_remove,
273+
.driver = {
274+
.name = "ramdax",
275+
},
276+
};
277+
278+
module_platform_driver(ramdax_driver);
279+
280+
MODULE_DESCRIPTION("NVDIMM support for e820 type-12 memory and OF pmem-region");
281+
MODULE_LICENSE("GPL");
282+
MODULE_AUTHOR("Microsoft Corporation");

0 commit comments

Comments
 (0)