@@ -1535,21 +1535,17 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
15351535 u64 page_size , u64 exported_size ,
15361536 struct device * dev , enum dma_data_direction dir )
15371537{
1538- u64 chunk_size , bar_address , dma_max_seg_size , cur_size_to_export , cur_npages ;
1539- struct asic_fixed_properties * prop ;
1540- int rc , i , j , nents , cur_page ;
1538+ u64 dma_max_seg_size , curr_page , size , chunk_size , left_size_to_export , left_size_in_page ,
1539+ left_size_in_dma_seg , device_address , bar_address ;
1540+ struct asic_fixed_properties * prop = & hdev -> asic_prop ;
15411541 struct scatterlist * sg ;
1542+ unsigned int nents , i ;
15421543 struct sg_table * sgt ;
1544+ bool next_sg_entry ;
1545+ int rc ;
15431546
1544- prop = & hdev -> asic_prop ;
1545-
1546- dma_max_seg_size = dma_get_max_seg_size (dev );
1547-
1548- /* We would like to align the max segment size to PAGE_SIZE, so the
1549- * SGL will contain aligned addresses that can be easily mapped to
1550- * an MMU
1551- */
1552- dma_max_seg_size = ALIGN_DOWN (dma_max_seg_size , PAGE_SIZE );
1547+ /* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */
1548+ dma_max_seg_size = ALIGN_DOWN (dma_get_max_seg_size (dev ), PAGE_SIZE );
15531549 if (dma_max_seg_size < PAGE_SIZE ) {
15541550 dev_err_ratelimited (hdev -> dev ,
15551551 "dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n" ,
@@ -1561,120 +1557,133 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
15611557 if (!sgt )
15621558 return ERR_PTR (- ENOMEM );
15631559
1564- cur_size_to_export = exported_size ;
1560+ /* Calculate the required number of entries for the SG table */
1561+ curr_page = 0 ;
1562+ nents = 1 ;
1563+ left_size_to_export = exported_size ;
1564+ left_size_in_page = page_size ;
1565+ left_size_in_dma_seg = dma_max_seg_size ;
1566+ next_sg_entry = false;
1567+
1568+ while (true) {
1569+ size = min3 (left_size_to_export , left_size_in_page , left_size_in_dma_seg );
1570+ left_size_to_export -= size ;
1571+ left_size_in_page -= size ;
1572+ left_size_in_dma_seg -= size ;
1573+
1574+ if (!left_size_to_export )
1575+ break ;
15651576
1566- /* If the size of each page is larger than the dma max segment size,
1567- * then we can't combine pages and the number of entries in the SGL
1568- * will just be the
1569- * <number of pages> * <chunks of max segment size in each page>
1570- */
1571- if (page_size > dma_max_seg_size ) {
1572- /* we should limit number of pages according to the exported size */
1573- cur_npages = DIV_ROUND_UP_SECTOR_T (cur_size_to_export , page_size );
1574- nents = cur_npages * DIV_ROUND_UP_SECTOR_T (page_size , dma_max_seg_size );
1575- } else {
1576- cur_npages = npages ;
1577-
1578- /* Get number of non-contiguous chunks */
1579- for (i = 1 , nents = 1 , chunk_size = page_size ; i < cur_npages ; i ++ ) {
1580- if (pages [i - 1 ] + page_size != pages [i ] ||
1581- chunk_size + page_size > dma_max_seg_size ) {
1582- nents ++ ;
1583- chunk_size = page_size ;
1584- continue ;
1585- }
1577+ if (!left_size_in_page ) {
1578+ /* left_size_to_export is not zero so there must be another page */
1579+ if (pages [curr_page ] + page_size != pages [curr_page + 1 ])
1580+ next_sg_entry = true;
1581+
1582+ ++ curr_page ;
1583+ left_size_in_page = page_size ;
1584+ }
15861585
1587- chunk_size += page_size ;
1586+ if (!left_size_in_dma_seg ) {
1587+ next_sg_entry = true;
1588+ left_size_in_dma_seg = dma_max_seg_size ;
1589+ }
1590+
1591+ if (next_sg_entry ) {
1592+ ++ nents ;
1593+ next_sg_entry = false;
15881594 }
15891595 }
15901596
15911597 rc = sg_alloc_table (sgt , nents , GFP_KERNEL | __GFP_ZERO );
15921598 if (rc )
1593- goto error_free ;
1594-
1595- cur_page = 0 ;
1596-
1597- if (page_size > dma_max_seg_size ) {
1598- u64 size_left , cur_device_address = 0 ;
1599+ goto err_free_sgt ;
15991600
1600- size_left = page_size ;
1601+ /* Prepare the SG table entries */
1602+ curr_page = 0 ;
1603+ device_address = pages [curr_page ];
1604+ left_size_to_export = exported_size ;
1605+ left_size_in_page = page_size ;
1606+ left_size_in_dma_seg = dma_max_seg_size ;
1607+ next_sg_entry = false;
16011608
1602- /* Need to split each page into the number of chunks of
1603- * dma_max_seg_size
1604- */
1605- for_each_sgtable_dma_sg (sgt , sg , i ) {
1606- if (size_left == page_size )
1607- cur_device_address =
1608- pages [cur_page ] - prop -> dram_base_address ;
1609- else
1610- cur_device_address += dma_max_seg_size ;
1611-
1612- /* make sure not to export over exported size */
1613- chunk_size = min3 (size_left , dma_max_seg_size , cur_size_to_export );
1614-
1615- bar_address = hdev -> dram_pci_bar_start + cur_device_address ;
1616-
1617- rc = set_dma_sg (sg , bar_address , chunk_size , dev , dir );
1618- if (rc )
1619- goto error_unmap ;
1609+ for_each_sgtable_dma_sg (sgt , sg , i ) {
1610+ bar_address = hdev -> dram_pci_bar_start + (device_address - prop -> dram_base_address );
1611+ chunk_size = 0 ;
1612+
1613+ for ( ; curr_page < npages ; ++ curr_page ) {
1614+ size = min3 (left_size_to_export , left_size_in_page , left_size_in_dma_seg );
1615+ chunk_size += size ;
1616+ left_size_to_export -= size ;
1617+ left_size_in_page -= size ;
1618+ left_size_in_dma_seg -= size ;
1619+
1620+ if (!left_size_to_export )
1621+ break ;
1622+
1623+ if (!left_size_in_page ) {
1624+ /* left_size_to_export is not zero so there must be another page */
1625+ if (pages [curr_page ] + page_size != pages [curr_page + 1 ]) {
1626+ device_address = pages [curr_page + 1 ];
1627+ next_sg_entry = true;
1628+ }
1629+
1630+ left_size_in_page = page_size ;
1631+ }
16201632
1621- cur_size_to_export -= chunk_size ;
1633+ if (!left_size_in_dma_seg ) {
1634+ /*
1635+ * Skip setting a new device address if already moving to a page
1636+ * which is not contiguous with the current page.
1637+ */
1638+ if (!next_sg_entry ) {
1639+ device_address += chunk_size ;
1640+ next_sg_entry = true;
1641+ }
1642+
1643+ left_size_in_dma_seg = dma_max_seg_size ;
1644+ }
16221645
1623- if (size_left > dma_max_seg_size ) {
1624- size_left -= dma_max_seg_size ;
1625- } else {
1626- cur_page ++ ;
1627- size_left = page_size ;
1646+ if (next_sg_entry ) {
1647+ next_sg_entry = false;
1648+ break ;
16281649 }
16291650 }
1630- } else {
1631- /* Merge pages and put them into the scatterlist */
1632- for_each_sgtable_dma_sg (sgt , sg , i ) {
1633- chunk_size = page_size ;
1634- for (j = cur_page + 1 ; j < cur_npages ; j ++ ) {
1635- if (pages [j - 1 ] + page_size != pages [j ] ||
1636- chunk_size + page_size > dma_max_seg_size )
1637- break ;
1638-
1639- chunk_size += page_size ;
1640- }
1641-
1642- bar_address = hdev -> dram_pci_bar_start +
1643- (pages [cur_page ] - prop -> dram_base_address );
16441651
1645- /* make sure not to export over exported size */
1646- chunk_size = min (chunk_size , cur_size_to_export );
1647- rc = set_dma_sg (sg , bar_address , chunk_size , dev , dir );
1648- if (rc )
1649- goto error_unmap ;
1652+ rc = set_dma_sg (sg , bar_address , chunk_size , dev , dir );
1653+ if (rc )
1654+ goto err_unmap ;
1655+ }
16501656
1651- cur_size_to_export -= chunk_size ;
1652- cur_page = j ;
1653- }
1657+ /* There should be nothing left to export exactly after looping over all SG elements */
1658+ if (left_size_to_export ) {
1659+ dev_err (hdev -> dev ,
1660+ "left size to export %#llx after initializing %u SG elements\n" ,
1661+ left_size_to_export , sgt -> nents );
1662+ rc = - ENOMEM ;
1663+ goto err_unmap ;
16541664 }
16551665
1656- /* Because we are not going to include a CPU list we want to have some
1657- * chance that other users will detect this by setting the orig_nents
1658- * to 0 and using only nents (length of DMA list) when going over the
1659- * sgl
1666+ /*
1667+ * Because we are not going to include a CPU list, we want to have some chance that other
1668+ * users will detect this when going over SG table, by setting the orig_nents to 0 and using
1669+ * only nents (length of DMA list).
16601670 */
16611671 sgt -> orig_nents = 0 ;
16621672
16631673 return sgt ;
16641674
1665- error_unmap :
1675+ err_unmap :
16661676 for_each_sgtable_dma_sg (sgt , sg , i ) {
16671677 if (!sg_dma_len (sg ))
16681678 continue ;
16691679
1670- dma_unmap_resource (dev , sg_dma_address (sg ),
1671- sg_dma_len (sg ), dir ,
1680+ dma_unmap_resource (dev , sg_dma_address (sg ), sg_dma_len (sg ), dir ,
16721681 DMA_ATTR_SKIP_CPU_SYNC );
16731682 }
16741683
16751684 sg_free_table (sgt );
16761685
1677- error_free :
1686+ err_free_sgt :
16781687 kfree (sgt );
16791688 return ERR_PTR (rc );
16801689}
0 commit comments