Skip to content

Commit 8f012db

Browse files
AlisonSchofieldhansendc
authored andcommitted
x86/numa: Introduce numa_fill_memblks()
numa_fill_memblks() fills in the gaps in numa_meminfo memblks over an physical address range. The ACPI driver will use numa_fill_memblks() to implement a new Linux policy that prescribes extending proximity domains in a portion of a CFMWS window to the entire window. Dan Williams offered this explanation of the policy: A CFWMS is an ACPI data structure that indicates *potential* locations where CXL memory can be placed. It is the playground where the CXL driver has free reign to establish regions. That space can be populated by BIOS created regions, or driver created regions, after hotplug or other reconfiguration. When BIOS creates a region in a CXL Window it additionally describes that subset of the Window range in the other typical ACPI tables SRAT, SLIT, and HMAT. The rationale for BIOS not pre-describing the entire CXL Window in SRAT, SLIT, and HMAT is that it can not predict the future. I.e. there is nothing stopping higher or lower performance devices being placed in the same Window. Compare that to ACPI memory hotplug that just onlines additional capacity in the proximity domain with little freedom for dynamic performance differentiation. That leaves the OS with a choice, should unpopulated window capacity match the proximity domain of an existing region, or should it allocate a new one? This patch takes the simple position of minimizing proximity domain proliferation by reusing any proximity domain intersection for the entire Window. If the Window has no intersections then allocate a new proximity domain. Note that SRAT, SLIT and HMAT information can be enumerated dynamically in a standard way from device provided data. Think of CXL as the end of ACPI needing to describe memory attributes, CXL offers a standard discovery model for performance attributes, but Linux still needs to interoperate with the old regime. Reported-by: Derick Marks <derick.w.marks@intel.com> Suggested-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Alison Schofield <alison.schofield@intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Tested-by: Derick Marks <derick.w.marks@intel.com> Link: https://lore.kernel.org/all/ef078a6f056ca974e5af85997013c0fda9e3326d.1689018477.git.alison.schofield%40intel.com
1 parent 0bb80ec commit 8f012db

3 files changed

Lines changed: 89 additions & 0 deletions

File tree

arch/x86/include/asm/sparsemem.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ extern int phys_to_target_node(phys_addr_t start);
3737
#define phys_to_target_node phys_to_target_node
3838
extern int memory_add_physaddr_to_nid(u64 start);
3939
#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
40+
extern int numa_fill_memblks(u64 start, u64 end);
41+
#define numa_fill_memblks numa_fill_memblks
4042
#endif
4143
#endif /* __ASSEMBLY__ */
4244

arch/x86/mm/numa.c

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/nodemask.h>
1212
#include <linux/sched.h>
1313
#include <linux/topology.h>
14+
#include <linux/sort.h>
1415

1516
#include <asm/e820/api.h>
1617
#include <asm/proto.h>
@@ -961,4 +962,83 @@ int memory_add_physaddr_to_nid(u64 start)
961962
return nid;
962963
}
963964
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
965+
966+
static int __init cmp_memblk(const void *a, const void *b)
967+
{
968+
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
969+
const struct numa_memblk *mb = *(const struct numa_memblk **)b;
970+
971+
return ma->start - mb->start;
972+
}
973+
974+
static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
975+
976+
/**
977+
* numa_fill_memblks - Fill gaps in numa_meminfo memblks
978+
* @start: address to begin fill
979+
* @end: address to end fill
980+
*
981+
* Find and extend numa_meminfo memblks to cover the @start-@end
982+
* physical address range, such that the first memblk includes
983+
* @start, the last memblk includes @end, and any gaps in between
984+
* are filled.
985+
*
986+
* RETURNS:
987+
* 0 : Success
988+
* NUMA_NO_MEMBLK : No memblk exists in @start-@end range
989+
*/
990+
991+
int __init numa_fill_memblks(u64 start, u64 end)
992+
{
993+
struct numa_memblk **blk = &numa_memblk_list[0];
994+
struct numa_meminfo *mi = &numa_meminfo;
995+
int count = 0;
996+
u64 prev_end;
997+
998+
/*
999+
* Create a list of pointers to numa_meminfo memblks that
1000+
* overlap start, end. Exclude (start == bi->end) since
1001+
* end addresses in both a CFMWS range and a memblk range
1002+
* are exclusive.
1003+
*
1004+
* This list of pointers is used to make in-place changes
1005+
* that fill out the numa_meminfo memblks.
1006+
*/
1007+
for (int i = 0; i < mi->nr_blks; i++) {
1008+
struct numa_memblk *bi = &mi->blk[i];
1009+
1010+
if (start < bi->end && end >= bi->start) {
1011+
blk[count] = &mi->blk[i];
1012+
count++;
1013+
}
1014+
}
1015+
if (!count)
1016+
return NUMA_NO_MEMBLK;
1017+
1018+
/* Sort the list of pointers in memblk->start order */
1019+
sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL);
1020+
1021+
/* Make sure the first/last memblks include start/end */
1022+
blk[0]->start = min(blk[0]->start, start);
1023+
blk[count - 1]->end = max(blk[count - 1]->end, end);
1024+
1025+
/*
1026+
* Fill any gaps by tracking the previous memblks
1027+
* end address and backfilling to it if needed.
1028+
*/
1029+
prev_end = blk[0]->end;
1030+
for (int i = 1; i < count; i++) {
1031+
struct numa_memblk *curr = blk[i];
1032+
1033+
if (prev_end >= curr->start) {
1034+
if (prev_end < curr->end)
1035+
prev_end = curr->end;
1036+
} else {
1037+
curr->start = prev_end;
1038+
prev_end = curr->end;
1039+
}
1040+
}
1041+
return 0;
1042+
}
1043+
9641044
#endif

include/linux/numa.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#define MAX_NUMNODES (1 << NODES_SHIFT)
1313

1414
#define NUMA_NO_NODE (-1)
15+
#define NUMA_NO_MEMBLK (-1)
1516

1617
/* optionally keep NUMA memory info available post init */
1718
#ifdef CONFIG_NUMA_KEEP_MEMINFO
@@ -43,6 +44,12 @@ static inline int phys_to_target_node(u64 start)
4344
return 0;
4445
}
4546
#endif
47+
#ifndef numa_fill_memblks
48+
static inline int __init numa_fill_memblks(u64 start, u64 end)
49+
{
50+
return NUMA_NO_MEMBLK;
51+
}
52+
#endif
4653
#else /* !CONFIG_NUMA */
4754
static inline int numa_map_to_online_node(int node)
4855
{

0 commit comments

Comments
 (0)