Skip to content

Commit 893e2f9

Browse files
committed
Merge tag 'dma-mapping-6.8-2024-01-08' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig: - reduce area lock contention for non-primary IO TLB pools (Petr Tesarik) - don't store redundant offsets in the dma_ranges stuctures (Robin Murphy) - clear dev->dma_mem when freeing per-device pools (Joakim Zhang) * tag 'dma-mapping-6.8-2024-01-08' of git://git.infradead.org/users/hch/dma-mapping: dma-mapping: clear dev->dma_mem to NULL after freeing it swiotlb: reduce area lock contention for non-primary IO TLB pools dma-mapping: don't store redundant offsets
2 parents 457e4f9 + b07bc23 commit 893e2f9

6 files changed

Lines changed: 70 additions & 46 deletions

File tree

drivers/acpi/scan.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1532,7 +1532,6 @@ int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map)
15321532
r->cpu_start = rentry->res->start;
15331533
r->dma_start = rentry->res->start - rentry->offset;
15341534
r->size = resource_size(rentry->res);
1535-
r->offset = rentry->offset;
15361535
r++;
15371536
}
15381537
}

drivers/of/address.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,6 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map)
955955
r->cpu_start = range.cpu_addr;
956956
r->dma_start = range.bus_addr;
957957
r->size = range.size;
958-
r->offset = range.cpu_addr - range.bus_addr;
959958
r++;
960959
}
961960
out:

include/linux/dma-direct.h

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,19 @@ struct bus_dma_region {
2121
phys_addr_t cpu_start;
2222
dma_addr_t dma_start;
2323
u64 size;
24-
u64 offset;
2524
};
2625

2726
static inline dma_addr_t translate_phys_to_dma(struct device *dev,
2827
phys_addr_t paddr)
2928
{
3029
const struct bus_dma_region *m;
3130

32-
for (m = dev->dma_range_map; m->size; m++)
33-
if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size)
34-
return (dma_addr_t)paddr - m->offset;
31+
for (m = dev->dma_range_map; m->size; m++) {
32+
u64 offset = paddr - m->cpu_start;
33+
34+
if (paddr >= m->cpu_start && offset < m->size)
35+
return m->dma_start + offset;
36+
}
3537

3638
/* make sure dma_capable fails when no translation is available */
3739
return DMA_MAPPING_ERROR;
@@ -42,9 +44,12 @@ static inline phys_addr_t translate_dma_to_phys(struct device *dev,
4244
{
4345
const struct bus_dma_region *m;
4446

45-
for (m = dev->dma_range_map; m->size; m++)
46-
if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size)
47-
return (phys_addr_t)dma_addr + m->offset;
47+
for (m = dev->dma_range_map; m->size; m++) {
48+
u64 offset = dma_addr - m->dma_start;
49+
50+
if (dma_addr >= m->dma_start && offset < m->size)
51+
return m->cpu_start + offset;
52+
}
4853

4954
return (phys_addr_t)-1;
5055
}

kernel/dma/coherent.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,10 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
132132

133133
void dma_release_coherent_memory(struct device *dev)
134134
{
135-
if (dev)
135+
if (dev) {
136136
_dma_release_coherent_memory(dev->dma_mem);
137+
dev->dma_mem = NULL;
138+
}
137139
}
138140

139141
static void *__dma_alloc_from_coherent(struct device *dev,

kernel/dma/direct.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,6 @@ int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start,
677677
return -ENOMEM;
678678
map[0].cpu_start = cpu_start;
679679
map[0].dma_start = dma_start;
680-
map[0].offset = offset;
681680
map[0].size = size;
682681
dev->dma_range_map = map;
683682
return 0;

kernel/dma/swiotlb.c

Lines changed: 55 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
957957
#endif /* CONFIG_DEBUG_FS */
958958

959959
/**
960-
* swiotlb_area_find_slots() - search for slots in one IO TLB memory area
960+
* swiotlb_search_pool_area() - search one memory area in one pool
961961
* @dev: Device which maps the buffer.
962962
* @pool: Memory pool to be searched.
963963
* @area_index: Index of the IO TLB memory area to be searched.
@@ -972,7 +972,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
972972
*
973973
* Return: Index of the first allocated slot, or -1 on error.
974974
*/
975-
static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool,
975+
static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool,
976976
int area_index, phys_addr_t orig_addr, size_t alloc_size,
977977
unsigned int alloc_align_mask)
978978
{
@@ -1066,41 +1066,50 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool,
10661066
return slot_index;
10671067
}
10681068

1069+
#ifdef CONFIG_SWIOTLB_DYNAMIC
1070+
10691071
/**
1070-
* swiotlb_pool_find_slots() - search for slots in one memory pool
1072+
* swiotlb_search_area() - search one memory area in all pools
10711073
* @dev: Device which maps the buffer.
1072-
* @pool: Memory pool to be searched.
1074+
* @start_cpu: Start CPU number.
1075+
* @cpu_offset: Offset from @start_cpu.
10731076
* @orig_addr: Original (non-bounced) IO buffer address.
10741077
* @alloc_size: Total requested size of the bounce buffer,
10751078
* including initial alignment padding.
10761079
* @alloc_align_mask: Required alignment of the allocated buffer.
1080+
* @retpool: Used memory pool, updated on return.
10771081
*
1078-
* Search through one memory pool to find a sequence of slots that match the
1082+
* Search one memory area in all pools for a sequence of slots that match the
10791083
* allocation constraints.
10801084
*
10811085
* Return: Index of the first allocated slot, or -1 on error.
10821086
*/
1083-
static int swiotlb_pool_find_slots(struct device *dev, struct io_tlb_pool *pool,
1084-
phys_addr_t orig_addr, size_t alloc_size,
1085-
unsigned int alloc_align_mask)
1087+
static int swiotlb_search_area(struct device *dev, int start_cpu,
1088+
int cpu_offset, phys_addr_t orig_addr, size_t alloc_size,
1089+
unsigned int alloc_align_mask, struct io_tlb_pool **retpool)
10861090
{
1087-
int start = raw_smp_processor_id() & (pool->nareas - 1);
1088-
int i = start, index;
1089-
1090-
do {
1091-
index = swiotlb_area_find_slots(dev, pool, i, orig_addr,
1092-
alloc_size, alloc_align_mask);
1093-
if (index >= 0)
1094-
return index;
1095-
if (++i >= pool->nareas)
1096-
i = 0;
1097-
} while (i != start);
1091+
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
1092+
struct io_tlb_pool *pool;
1093+
int area_index;
1094+
int index = -1;
10981095

1099-
return -1;
1096+
rcu_read_lock();
1097+
list_for_each_entry_rcu(pool, &mem->pools, node) {
1098+
if (cpu_offset >= pool->nareas)
1099+
continue;
1100+
area_index = (start_cpu + cpu_offset) & (pool->nareas - 1);
1101+
index = swiotlb_search_pool_area(dev, pool, area_index,
1102+
orig_addr, alloc_size,
1103+
alloc_align_mask);
1104+
if (index >= 0) {
1105+
*retpool = pool;
1106+
break;
1107+
}
1108+
}
1109+
rcu_read_unlock();
1110+
return index;
11001111
}
11011112

1102-
#ifdef CONFIG_SWIOTLB_DYNAMIC
1103-
11041113
/**
11051114
* swiotlb_find_slots() - search for slots in the whole swiotlb
11061115
* @dev: Device which maps the buffer.
@@ -1124,18 +1133,17 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
11241133
unsigned long nslabs;
11251134
unsigned long flags;
11261135
u64 phys_limit;
1136+
int cpu, i;
11271137
int index;
11281138

1129-
rcu_read_lock();
1130-
list_for_each_entry_rcu(pool, &mem->pools, node) {
1131-
index = swiotlb_pool_find_slots(dev, pool, orig_addr,
1132-
alloc_size, alloc_align_mask);
1133-
if (index >= 0) {
1134-
rcu_read_unlock();
1139+
cpu = raw_smp_processor_id();
1140+
for (i = 0; i < default_nareas; ++i) {
1141+
index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size,
1142+
alloc_align_mask, &pool);
1143+
if (index >= 0)
11351144
goto found;
1136-
}
11371145
}
1138-
rcu_read_unlock();
1146+
11391147
if (!mem->can_grow)
11401148
return -1;
11411149

@@ -1148,8 +1156,8 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
11481156
if (!pool)
11491157
return -1;
11501158

1151-
index = swiotlb_pool_find_slots(dev, pool, orig_addr,
1152-
alloc_size, alloc_align_mask);
1159+
index = swiotlb_search_pool_area(dev, pool, 0, orig_addr,
1160+
alloc_size, alloc_align_mask);
11531161
if (index < 0) {
11541162
swiotlb_dyn_free(&pool->rcu);
11551163
return -1;
@@ -1192,9 +1200,21 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
11921200
size_t alloc_size, unsigned int alloc_align_mask,
11931201
struct io_tlb_pool **retpool)
11941202
{
1195-
*retpool = &dev->dma_io_tlb_mem->defpool;
1196-
return swiotlb_pool_find_slots(dev, *retpool,
1197-
orig_addr, alloc_size, alloc_align_mask);
1203+
struct io_tlb_pool *pool;
1204+
int start, i;
1205+
int index;
1206+
1207+
*retpool = pool = &dev->dma_io_tlb_mem->defpool;
1208+
i = start = raw_smp_processor_id() & (pool->nareas - 1);
1209+
do {
1210+
index = swiotlb_search_pool_area(dev, pool, i, orig_addr,
1211+
alloc_size, alloc_align_mask);
1212+
if (index >= 0)
1213+
return index;
1214+
if (++i >= pool->nareas)
1215+
i = 0;
1216+
} while (i != start);
1217+
return -1;
11981218
}
11991219

12001220
#endif /* CONFIG_SWIOTLB_DYNAMIC */

0 commit comments

Comments
 (0)