Skip to content

Commit 36db0b0

Browse files
james-c-linarobroonie
authored andcommitted
spi: spi-fsl-dspi: Use non-coherent memory for DMA
Using coherent memory here isn't functionally necessary, we're only either sending data to the device or reading from it. This means explicit synchronizations are only required around those points and the change is fairly trivial. This gives us around a 10% increase in throughput for large DMA transfers and no loss for small transfers. Suggested-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Frank Li <Frank.Li@nxp.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: James Clark <james.clark@linaro.org> Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com> Message-ID: <20250902-james-nxp-spi-dma-v6-4-f7aa2c5e56e2@linaro.org> Signed-off-by: Mark Brown <broonie@kernel.org>
1 parent 4850f15 commit 36db0b0

1 file changed

Lines changed: 41 additions & 24 deletions

File tree

drivers/spi/spi-fsl-dspi.c

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -493,21 +493,33 @@ static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi)
493493
return cmd << 16 | data;
494494
}
495495

496+
static size_t dspi_dma_transfer_size(struct fsl_dspi *dspi)
497+
{
498+
return dspi->words_in_flight * DMA_SLAVE_BUSWIDTH_4_BYTES;
499+
}
500+
496501
static void dspi_tx_dma_callback(void *arg)
497502
{
498503
struct fsl_dspi *dspi = arg;
499504
struct fsl_dspi_dma *dma = dspi->dma;
505+
struct device *dev = &dspi->pdev->dev;
500506

507+
dma_sync_single_for_cpu(dev, dma->tx_dma_phys,
508+
dspi_dma_transfer_size(dspi), DMA_TO_DEVICE);
501509
complete(&dma->cmd_tx_complete);
502510
}
503511

504512
static void dspi_rx_dma_callback(void *arg)
505513
{
506514
struct fsl_dspi *dspi = arg;
507515
struct fsl_dspi_dma *dma = dspi->dma;
516+
struct device *dev = &dspi->pdev->dev;
508517
int i;
509518

510519
if (dspi->rx) {
520+
dma_sync_single_for_cpu(dev, dma->rx_dma_phys,
521+
dspi_dma_transfer_size(dspi),
522+
DMA_FROM_DEVICE);
511523
for (i = 0; i < dspi->words_in_flight; i++)
512524
dspi_push_rx(dspi, dspi->dma->rx_dma_buf[i]);
513525
}
@@ -517,6 +529,7 @@ static void dspi_rx_dma_callback(void *arg)
517529

518530
static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
519531
{
532+
size_t size = dspi_dma_transfer_size(dspi);
520533
struct device *dev = &dspi->pdev->dev;
521534
struct fsl_dspi_dma *dma = dspi->dma;
522535
int time_left;
@@ -525,12 +538,12 @@ static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
525538
for (i = 0; i < dspi->words_in_flight; i++)
526539
dspi->dma->tx_dma_buf[i] = dspi_pop_tx_pushr(dspi);
527540

541+
dma_sync_single_for_device(dev, dma->tx_dma_phys, size, DMA_TO_DEVICE);
528542
dma->tx_desc = dmaengine_prep_slave_single(dma->chan_tx,
529-
dma->tx_dma_phys,
530-
dspi->words_in_flight *
531-
DMA_SLAVE_BUSWIDTH_4_BYTES,
532-
DMA_MEM_TO_DEV,
533-
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
543+
dma->tx_dma_phys, size,
544+
DMA_MEM_TO_DEV,
545+
DMA_PREP_INTERRUPT |
546+
DMA_CTRL_ACK);
534547
if (!dma->tx_desc) {
535548
dev_err(dev, "Not able to get desc for DMA xfer\n");
536549
return -EIO;
@@ -543,12 +556,13 @@ static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
543556
return -EINVAL;
544557
}
545558

559+
dma_sync_single_for_device(dev, dma->rx_dma_phys, size,
560+
DMA_FROM_DEVICE);
546561
dma->rx_desc = dmaengine_prep_slave_single(dma->chan_rx,
547-
dma->rx_dma_phys,
548-
dspi->words_in_flight *
549-
DMA_SLAVE_BUSWIDTH_4_BYTES,
550-
DMA_DEV_TO_MEM,
551-
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
562+
dma->rx_dma_phys, size,
563+
DMA_DEV_TO_MEM,
564+
DMA_PREP_INTERRUPT |
565+
DMA_CTRL_ACK);
552566
if (!dma->rx_desc) {
553567
dev_err(dev, "Not able to get desc for DMA xfer\n");
554568
return -EIO;
@@ -643,17 +657,17 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
643657
goto err_tx_channel;
644658
}
645659

646-
dma->tx_dma_buf = dma_alloc_coherent(dma->chan_tx->device->dev,
647-
dma_bufsize, &dma->tx_dma_phys,
648-
GFP_KERNEL);
660+
dma->tx_dma_buf = dma_alloc_noncoherent(dma->chan_tx->device->dev,
661+
dma_bufsize, &dma->tx_dma_phys,
662+
DMA_TO_DEVICE, GFP_KERNEL);
649663
if (!dma->tx_dma_buf) {
650664
ret = -ENOMEM;
651665
goto err_tx_dma_buf;
652666
}
653667

654-
dma->rx_dma_buf = dma_alloc_coherent(dma->chan_rx->device->dev,
655-
dma_bufsize, &dma->rx_dma_phys,
656-
GFP_KERNEL);
668+
dma->rx_dma_buf = dma_alloc_noncoherent(dma->chan_rx->device->dev,
669+
dma_bufsize, &dma->rx_dma_phys,
670+
DMA_FROM_DEVICE, GFP_KERNEL);
657671
if (!dma->rx_dma_buf) {
658672
ret = -ENOMEM;
659673
goto err_rx_dma_buf;
@@ -688,11 +702,12 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
688702
return 0;
689703

690704
err_slave_config:
691-
dma_free_coherent(dma->chan_rx->device->dev,
692-
dma_bufsize, dma->rx_dma_buf, dma->rx_dma_phys);
705+
dma_free_noncoherent(dma->chan_rx->device->dev, dma_bufsize,
706+
dma->rx_dma_buf, dma->rx_dma_phys,
707+
DMA_FROM_DEVICE);
693708
err_rx_dma_buf:
694-
dma_free_coherent(dma->chan_tx->device->dev,
695-
dma_bufsize, dma->tx_dma_buf, dma->tx_dma_phys);
709+
dma_free_noncoherent(dma->chan_tx->device->dev, dma_bufsize,
710+
dma->tx_dma_buf, dma->tx_dma_phys, DMA_TO_DEVICE);
696711
err_tx_dma_buf:
697712
dma_release_channel(dma->chan_tx);
698713
err_tx_channel:
@@ -713,14 +728,16 @@ static void dspi_release_dma(struct fsl_dspi *dspi)
713728
return;
714729

715730
if (dma->chan_tx) {
716-
dma_free_coherent(dma->chan_tx->device->dev, dma_bufsize,
717-
dma->tx_dma_buf, dma->tx_dma_phys);
731+
dma_free_noncoherent(dma->chan_tx->device->dev, dma_bufsize,
732+
dma->tx_dma_buf, dma->tx_dma_phys,
733+
DMA_TO_DEVICE);
718734
dma_release_channel(dma->chan_tx);
719735
}
720736

721737
if (dma->chan_rx) {
722-
dma_free_coherent(dma->chan_rx->device->dev, dma_bufsize,
723-
dma->rx_dma_buf, dma->rx_dma_phys);
738+
dma_free_noncoherent(dma->chan_rx->device->dev, dma_bufsize,
739+
dma->rx_dma_buf, dma->rx_dma_phys,
740+
DMA_FROM_DEVICE);
724741
dma_release_channel(dma->chan_rx);
725742
}
726743
}

0 commit comments

Comments
 (0)