Skip to content

Commit bb644f6

Browse files
Tomer Tayarogabbay
authored andcommitted
accel/habanalabs: fix SG table creation for dma-buf mapping
In some cases the calculated number of required entries for the dma-buf SG table is wrong. For example, if the page size is larger than both the dma max segment size of the importer device and from the exported side, or if the exported size is part of a phys_pg_pack that is composed of several pages. In these cases, redundant entries will be added to the SG table. Modify the method that the number of entries is calculated, and the way they are prepared. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
1 parent ba24b5e commit bb644f6

1 file changed

Lines changed: 104 additions & 95 deletions

File tree

drivers/accel/habanalabs/common/memory.c

Lines changed: 104 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1535,21 +1535,17 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
15351535
u64 page_size, u64 exported_size,
15361536
struct device *dev, enum dma_data_direction dir)
15371537
{
1538-
u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
1539-
struct asic_fixed_properties *prop;
1540-
int rc, i, j, nents, cur_page;
1538+
u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page,
1539+
left_size_in_dma_seg, device_address, bar_address;
1540+
struct asic_fixed_properties *prop = &hdev->asic_prop;
15411541
struct scatterlist *sg;
1542+
unsigned int nents, i;
15421543
struct sg_table *sgt;
1544+
bool next_sg_entry;
1545+
int rc;
15431546

1544-
prop = &hdev->asic_prop;
1545-
1546-
dma_max_seg_size = dma_get_max_seg_size(dev);
1547-
1548-
/* We would like to align the max segment size to PAGE_SIZE, so the
1549-
* SGL will contain aligned addresses that can be easily mapped to
1550-
* an MMU
1551-
*/
1552-
dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
1547+
/* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */
1548+
dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
15531549
if (dma_max_seg_size < PAGE_SIZE) {
15541550
dev_err_ratelimited(hdev->dev,
15551551
"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
@@ -1561,120 +1557,133 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
15611557
if (!sgt)
15621558
return ERR_PTR(-ENOMEM);
15631559

1564-
cur_size_to_export = exported_size;
1560+
/* Calculate the required number of entries for the SG table */
1561+
curr_page = 0;
1562+
nents = 1;
1563+
left_size_to_export = exported_size;
1564+
left_size_in_page = page_size;
1565+
left_size_in_dma_seg = dma_max_seg_size;
1566+
next_sg_entry = false;
1567+
1568+
while (true) {
1569+
size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
1570+
left_size_to_export -= size;
1571+
left_size_in_page -= size;
1572+
left_size_in_dma_seg -= size;
1573+
1574+
if (!left_size_to_export)
1575+
break;
15651576

1566-
/* If the size of each page is larger than the dma max segment size,
1567-
* then we can't combine pages and the number of entries in the SGL
1568-
* will just be the
1569-
* <number of pages> * <chunks of max segment size in each page>
1570-
*/
1571-
if (page_size > dma_max_seg_size) {
1572-
/* we should limit number of pages according to the exported size */
1573-
cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
1574-
nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
1575-
} else {
1576-
cur_npages = npages;
1577-
1578-
/* Get number of non-contiguous chunks */
1579-
for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
1580-
if (pages[i - 1] + page_size != pages[i] ||
1581-
chunk_size + page_size > dma_max_seg_size) {
1582-
nents++;
1583-
chunk_size = page_size;
1584-
continue;
1585-
}
1577+
if (!left_size_in_page) {
1578+
/* left_size_to_export is not zero so there must be another page */
1579+
if (pages[curr_page] + page_size != pages[curr_page + 1])
1580+
next_sg_entry = true;
1581+
1582+
++curr_page;
1583+
left_size_in_page = page_size;
1584+
}
15861585

1587-
chunk_size += page_size;
1586+
if (!left_size_in_dma_seg) {
1587+
next_sg_entry = true;
1588+
left_size_in_dma_seg = dma_max_seg_size;
1589+
}
1590+
1591+
if (next_sg_entry) {
1592+
++nents;
1593+
next_sg_entry = false;
15881594
}
15891595
}
15901596

15911597
rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
15921598
if (rc)
1593-
goto error_free;
1594-
1595-
cur_page = 0;
1596-
1597-
if (page_size > dma_max_seg_size) {
1598-
u64 size_left, cur_device_address = 0;
1599+
goto err_free_sgt;
15991600

1600-
size_left = page_size;
1601+
/* Prepare the SG table entries */
1602+
curr_page = 0;
1603+
device_address = pages[curr_page];
1604+
left_size_to_export = exported_size;
1605+
left_size_in_page = page_size;
1606+
left_size_in_dma_seg = dma_max_seg_size;
1607+
next_sg_entry = false;
16011608

1602-
/* Need to split each page into the number of chunks of
1603-
* dma_max_seg_size
1604-
*/
1605-
for_each_sgtable_dma_sg(sgt, sg, i) {
1606-
if (size_left == page_size)
1607-
cur_device_address =
1608-
pages[cur_page] - prop->dram_base_address;
1609-
else
1610-
cur_device_address += dma_max_seg_size;
1611-
1612-
/* make sure not to export over exported size */
1613-
chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
1614-
1615-
bar_address = hdev->dram_pci_bar_start + cur_device_address;
1616-
1617-
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
1618-
if (rc)
1619-
goto error_unmap;
1609+
for_each_sgtable_dma_sg(sgt, sg, i) {
1610+
bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address);
1611+
chunk_size = 0;
1612+
1613+
for ( ; curr_page < npages ; ++curr_page) {
1614+
size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
1615+
chunk_size += size;
1616+
left_size_to_export -= size;
1617+
left_size_in_page -= size;
1618+
left_size_in_dma_seg -= size;
1619+
1620+
if (!left_size_to_export)
1621+
break;
1622+
1623+
if (!left_size_in_page) {
1624+
/* left_size_to_export is not zero so there must be another page */
1625+
if (pages[curr_page] + page_size != pages[curr_page + 1]) {
1626+
device_address = pages[curr_page + 1];
1627+
next_sg_entry = true;
1628+
}
1629+
1630+
left_size_in_page = page_size;
1631+
}
16201632

1621-
cur_size_to_export -= chunk_size;
1633+
if (!left_size_in_dma_seg) {
1634+
/*
1635+
* Skip setting a new device address if already moving to a page
1636+
* which is not contiguous with the current page.
1637+
*/
1638+
if (!next_sg_entry) {
1639+
device_address += chunk_size;
1640+
next_sg_entry = true;
1641+
}
1642+
1643+
left_size_in_dma_seg = dma_max_seg_size;
1644+
}
16221645

1623-
if (size_left > dma_max_seg_size) {
1624-
size_left -= dma_max_seg_size;
1625-
} else {
1626-
cur_page++;
1627-
size_left = page_size;
1646+
if (next_sg_entry) {
1647+
next_sg_entry = false;
1648+
break;
16281649
}
16291650
}
1630-
} else {
1631-
/* Merge pages and put them into the scatterlist */
1632-
for_each_sgtable_dma_sg(sgt, sg, i) {
1633-
chunk_size = page_size;
1634-
for (j = cur_page + 1 ; j < cur_npages ; j++) {
1635-
if (pages[j - 1] + page_size != pages[j] ||
1636-
chunk_size + page_size > dma_max_seg_size)
1637-
break;
1638-
1639-
chunk_size += page_size;
1640-
}
1641-
1642-
bar_address = hdev->dram_pci_bar_start +
1643-
(pages[cur_page] - prop->dram_base_address);
16441651

1645-
/* make sure not to export over exported size */
1646-
chunk_size = min(chunk_size, cur_size_to_export);
1647-
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
1648-
if (rc)
1649-
goto error_unmap;
1652+
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
1653+
if (rc)
1654+
goto err_unmap;
1655+
}
16501656

1651-
cur_size_to_export -= chunk_size;
1652-
cur_page = j;
1653-
}
1657+
/* There should be nothing left to export exactly after looping over all SG elements */
1658+
if (left_size_to_export) {
1659+
dev_err(hdev->dev,
1660+
"left size to export %#llx after initializing %u SG elements\n",
1661+
left_size_to_export, sgt->nents);
1662+
rc = -ENOMEM;
1663+
goto err_unmap;
16541664
}
16551665

1656-
/* Because we are not going to include a CPU list we want to have some
1657-
* chance that other users will detect this by setting the orig_nents
1658-
* to 0 and using only nents (length of DMA list) when going over the
1659-
* sgl
1666+
/*
1667+
* Because we are not going to include a CPU list, we want to have some chance that other
1668+
* users will detect this when going over SG table, by setting the orig_nents to 0 and using
1669+
* only nents (length of DMA list).
16601670
*/
16611671
sgt->orig_nents = 0;
16621672

16631673
return sgt;
16641674

1665-
error_unmap:
1675+
err_unmap:
16661676
for_each_sgtable_dma_sg(sgt, sg, i) {
16671677
if (!sg_dma_len(sg))
16681678
continue;
16691679

1670-
dma_unmap_resource(dev, sg_dma_address(sg),
1671-
sg_dma_len(sg), dir,
1680+
dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir,
16721681
DMA_ATTR_SKIP_CPU_SYNC);
16731682
}
16741683

16751684
sg_free_table(sgt);
16761685

1677-
error_free:
1686+
err_free_sgt:
16781687
kfree(sgt);
16791688
return ERR_PTR(rc);
16801689
}

0 commit comments

Comments
 (0)