Skip to content

Commit d17dd2f

Browse files
clegofficbroonie
authored andcommitted
spi: stm32: use STM32 DMA with STM32 MDMA to enhance DDR use
The STM32 DMA doesn't have the ability to generate convenient burst transfer on the DDR, ensuring the best load of the AXI & DDR. To avoid this bad load of the AXI & DDR, STM32 MDMA can be used to transfer data to the DDR, being triggered by STM32 DMA channel transfer completion. An SRAM buffer is used between DMA and MDMA. So the MDMA always does MEM_TO_MEM transfers (from/to SRAM to/from DDR), and the DMA uses SRAM instead of DDR with DEV_TO_MEM transfers. SPI RX DMA (DEV_TO_MEM) becomes: SPI RX FIFO ==DMA==> SRAM ==MDMA==> DDR In RX (DEV_TO_MEM), EOT interrupt is used to pause the DMA channel (which will raise a transfer complete) to trigger the MDMA to flush the SRAM (when transfer length is not aligned on SRAM period). TX remains on the former implementation. Signed-off-by: Clément Le Goffic <clement.legoffic@foss.st.com> Link: https://patch.msgid.link/20250616-spi-upstream-v1-4-7e8593f3f75d@foss.st.com Signed-off-by: Mark Brown <broonie@kernel.org>
1 parent 21f1c80 commit d17dd2f

1 file changed

Lines changed: 229 additions & 24 deletions

File tree

drivers/spi/spi-stm32.c

Lines changed: 229 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
#include <linux/debugfs.h>
1010
#include <linux/clk.h>
1111
#include <linux/delay.h>
12+
#include <linux/dma-mapping.h>
1213
#include <linux/dmaengine.h>
14+
#include <linux/genalloc.h>
1315
#include <linux/interrupt.h>
1416
#include <linux/iopoll.h>
1517
#include <linux/module.h>
@@ -328,6 +330,11 @@ struct stm32_spi_cfg {
328330
* @dma_rx: dma channel for RX transfer
329331
* @phys_addr: SPI registers physical base address
330332
* @device_mode: the controller is configured as SPI device
333+
* @sram_pool: SRAM pool for DMA transfers
334+
* @sram_rx_buf_size: size of SRAM buffer for RX transfer
335+
* @sram_rx_buf: SRAM buffer for RX transfer
336+
* @sram_dma_rx_buf: SRAM buffer physical address for RX transfer
337+
* @mdma_rx: MDMA channel for RX transfer
331338
*/
332339
struct stm32_spi {
333340
struct device *dev;
@@ -362,6 +369,12 @@ struct stm32_spi {
362369
dma_addr_t phys_addr;
363370

364371
bool device_mode;
372+
373+
struct gen_pool *sram_pool;
374+
size_t sram_rx_buf_size;
375+
void *sram_rx_buf;
376+
dma_addr_t sram_dma_rx_buf;
377+
struct dma_chan *mdma_rx;
365378
};
366379

367380
static const struct stm32_spi_regspec stm32fx_spi_regspec = {
@@ -885,8 +898,11 @@ static void stm32h7_spi_disable(struct stm32_spi *spi)
885898

886899
if (spi->cur_usedma && spi->dma_tx)
887900
dmaengine_terminate_async(spi->dma_tx);
888-
if (spi->cur_usedma && spi->dma_rx)
901+
if (spi->cur_usedma && spi->dma_rx) {
889902
dmaengine_terminate_async(spi->dma_rx);
903+
if (spi->mdma_rx)
904+
dmaengine_terminate_async(spi->mdma_rx);
905+
}
890906

891907
stm32_spi_clr_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_SPE);
892908

@@ -1098,10 +1114,13 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
10981114
}
10991115

11001116
if (sr & STM32H7_SPI_SR_EOT) {
1117+
dev_dbg(spi->dev, "End of transfer\n");
11011118
if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0)))
11021119
stm32h7_spi_read_rxfifo(spi);
11031120
if (!spi->cur_usedma ||
1104-
(spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX))
1121+
(spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX) ||
1122+
(spi->mdma_rx && (spi->cur_comm == SPI_SIMPLEX_RX ||
1123+
spi->cur_comm == SPI_FULL_DUPLEX)))
11051124
end = true;
11061125
}
11071126

@@ -1118,6 +1137,11 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
11181137
spin_unlock_irqrestore(&spi->lock, flags);
11191138

11201139
if (end) {
1140+
if (spi->cur_usedma && spi->mdma_rx) {
1141+
dmaengine_pause(spi->dma_rx);
1142+
/* Wait for callback */
1143+
return IRQ_HANDLED;
1144+
}
11211145
stm32h7_spi_disable(spi);
11221146
spi_finalize_current_transfer(ctrl);
11231147
}
@@ -1423,6 +1447,8 @@ static void stm32h7_spi_transfer_one_dma_start(struct stm32_spi *spi)
14231447
/* Enable the interrupts */
14241448
if (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX)
14251449
ier |= STM32H7_SPI_IER_EOTIE | STM32H7_SPI_IER_TXTFIE;
1450+
if (spi->mdma_rx && (spi->cur_comm == SPI_SIMPLEX_RX || spi->cur_comm == SPI_FULL_DUPLEX))
1451+
ier |= STM32H7_SPI_IER_EOTIE;
14261452

14271453
stm32_spi_set_bits(spi, STM32H7_SPI_IER, ier);
14281454

@@ -1432,6 +1458,119 @@ static void stm32h7_spi_transfer_one_dma_start(struct stm32_spi *spi)
14321458
stm32_spi_set_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_CSTART);
14331459
}
14341460

1461+
/**
1462+
* stm32_spi_prepare_rx_dma_mdma_chaining - Prepare RX DMA and MDMA chaining
1463+
* @spi: pointer to the spi controller data structure
1464+
* @xfer: pointer to the spi transfer
1465+
* @rx_dma_conf: pointer to the DMA configuration for RX channel
1466+
* @rx_dma_desc: pointer to the RX DMA descriptor
1467+
* @rx_mdma_desc: pointer to the RX MDMA descriptor
1468+
*
1469+
* It must return 0 if the chaining is possible or an error code if not.
1470+
*/
1471+
static int stm32_spi_prepare_rx_dma_mdma_chaining(struct stm32_spi *spi,
1472+
struct spi_transfer *xfer,
1473+
struct dma_slave_config *rx_dma_conf,
1474+
struct dma_async_tx_descriptor **rx_dma_desc,
1475+
struct dma_async_tx_descriptor **rx_mdma_desc)
1476+
{
1477+
struct dma_slave_config rx_mdma_conf = {0};
1478+
u32 sram_period, nents = 0, spi_s_len;
1479+
struct sg_table dma_sgt, mdma_sgt;
1480+
struct scatterlist *spi_s, *s;
1481+
dma_addr_t dma_buf;
1482+
int i, ret;
1483+
1484+
sram_period = spi->sram_rx_buf_size / 2;
1485+
1486+
/* Configure MDMA RX channel */
1487+
rx_mdma_conf.direction = rx_dma_conf->direction;
1488+
rx_mdma_conf.src_addr = spi->sram_dma_rx_buf;
1489+
rx_mdma_conf.peripheral_config = rx_dma_conf->peripheral_config;
1490+
rx_mdma_conf.peripheral_size = rx_dma_conf->peripheral_size;
1491+
dmaengine_slave_config(spi->mdma_rx, &rx_mdma_conf);
1492+
1493+
/* Count the number of entries needed */
1494+
for_each_sg(xfer->rx_sg.sgl, spi_s, xfer->rx_sg.nents, i)
1495+
if (sg_dma_len(spi_s) > sram_period)
1496+
nents += DIV_ROUND_UP(sg_dma_len(spi_s), sram_period);
1497+
else
1498+
nents++;
1499+
1500+
/* Prepare DMA slave_sg DBM transfer DEV_TO_MEM (RX>MEM=SRAM) */
1501+
ret = sg_alloc_table(&dma_sgt, nents, GFP_ATOMIC);
1502+
if (ret)
1503+
return ret;
1504+
1505+
spi_s = xfer->rx_sg.sgl;
1506+
spi_s_len = sg_dma_len(spi_s);
1507+
dma_buf = spi->sram_dma_rx_buf;
1508+
for_each_sg(dma_sgt.sgl, s, dma_sgt.nents, i) {
1509+
size_t bytes = min_t(size_t, spi_s_len, sram_period);
1510+
1511+
sg_dma_len(s) = bytes;
1512+
sg_dma_address(s) = dma_buf;
1513+
spi_s_len -= bytes;
1514+
1515+
if (!spi_s_len && sg_next(spi_s)) {
1516+
spi_s = sg_next(spi_s);
1517+
spi_s_len = sg_dma_len(spi_s);
1518+
dma_buf = spi->sram_dma_rx_buf;
1519+
} else { /* DMA configured in DBM: it will swap between the SRAM periods */
1520+
if (i & 1)
1521+
dma_buf += sram_period;
1522+
else
1523+
dma_buf = spi->sram_dma_rx_buf;
1524+
}
1525+
}
1526+
1527+
*rx_dma_desc = dmaengine_prep_slave_sg(spi->dma_rx, dma_sgt.sgl,
1528+
dma_sgt.nents, rx_dma_conf->direction,
1529+
DMA_PREP_INTERRUPT);
1530+
sg_free_table(&dma_sgt);
1531+
1532+
if (!rx_dma_desc)
1533+
return -EINVAL;
1534+
1535+
/* Prepare MDMA slave_sg transfer MEM_TO_MEM (SRAM>DDR) */
1536+
ret = sg_alloc_table(&mdma_sgt, nents, GFP_ATOMIC);
1537+
if (ret) {
1538+
rx_dma_desc = NULL;
1539+
return ret;
1540+
}
1541+
1542+
spi_s = xfer->rx_sg.sgl;
1543+
spi_s_len = sg_dma_len(spi_s);
1544+
dma_buf = sg_dma_address(spi_s);
1545+
for_each_sg(mdma_sgt.sgl, s, mdma_sgt.nents, i) {
1546+
size_t bytes = min_t(size_t, spi_s_len, sram_period);
1547+
1548+
sg_dma_len(s) = bytes;
1549+
sg_dma_address(s) = dma_buf;
1550+
spi_s_len -= bytes;
1551+
1552+
if (!spi_s_len && sg_next(spi_s)) {
1553+
spi_s = sg_next(spi_s);
1554+
spi_s_len = sg_dma_len(spi_s);
1555+
dma_buf = sg_dma_address(spi_s);
1556+
} else {
1557+
dma_buf += bytes;
1558+
}
1559+
}
1560+
1561+
*rx_mdma_desc = dmaengine_prep_slave_sg(spi->mdma_rx, mdma_sgt.sgl,
1562+
mdma_sgt.nents, rx_mdma_conf.direction,
1563+
DMA_PREP_INTERRUPT);
1564+
sg_free_table(&mdma_sgt);
1565+
1566+
if (!rx_mdma_desc) {
1567+
rx_dma_desc = NULL;
1568+
return -EINVAL;
1569+
}
1570+
1571+
return 0;
1572+
}
1573+
14351574
/**
14361575
* stm32_spi_transfer_one_dma - transfer a single spi_transfer using DMA
14371576
* @spi: pointer to the spi controller data structure
@@ -1443,38 +1582,43 @@ static void stm32h7_spi_transfer_one_dma_start(struct stm32_spi *spi)
14431582
static int stm32_spi_transfer_one_dma(struct stm32_spi *spi,
14441583
struct spi_transfer *xfer)
14451584
{
1585+
struct dma_async_tx_descriptor *rx_mdma_desc = NULL, *rx_dma_desc = NULL;
1586+
struct dma_async_tx_descriptor *tx_dma_desc = NULL;
14461587
struct dma_slave_config tx_dma_conf, rx_dma_conf;
1447-
struct dma_async_tx_descriptor *tx_dma_desc, *rx_dma_desc;
14481588
unsigned long flags;
1589+
int ret = 0;
14491590

14501591
spin_lock_irqsave(&spi->lock, flags);
14511592

1452-
rx_dma_desc = NULL;
14531593
if (spi->rx_buf && spi->dma_rx) {
14541594
stm32_spi_dma_config(spi, spi->dma_rx, &rx_dma_conf, DMA_DEV_TO_MEM);
1455-
dmaengine_slave_config(spi->dma_rx, &rx_dma_conf);
1456-
1457-
/* Enable Rx DMA request */
1458-
stm32_spi_set_bits(spi, spi->cfg->regs->dma_rx_en.reg,
1459-
spi->cfg->regs->dma_rx_en.mask);
1460-
1461-
rx_dma_desc = dmaengine_prep_slave_sg(
1462-
spi->dma_rx, xfer->rx_sg.sgl,
1463-
xfer->rx_sg.nents,
1464-
rx_dma_conf.direction,
1465-
DMA_PREP_INTERRUPT);
1595+
if (spi->mdma_rx) {
1596+
rx_dma_conf.peripheral_size = 1;
1597+
dmaengine_slave_config(spi->dma_rx, &rx_dma_conf);
1598+
1599+
ret = stm32_spi_prepare_rx_dma_mdma_chaining(spi, xfer, &rx_dma_conf,
1600+
&rx_dma_desc, &rx_mdma_desc);
1601+
if (ret) { /* RX DMA MDMA chaining not possible, fallback to DMA only */
1602+
rx_dma_conf.peripheral_config = 0;
1603+
rx_dma_desc = NULL;
1604+
}
1605+
}
1606+
if (!rx_dma_desc) {
1607+
dmaengine_slave_config(spi->dma_rx, &rx_dma_conf);
1608+
rx_dma_desc = dmaengine_prep_slave_sg(spi->dma_rx, xfer->rx_sg.sgl,
1609+
xfer->rx_sg.nents,
1610+
rx_dma_conf.direction,
1611+
DMA_PREP_INTERRUPT);
1612+
}
14661613
}
14671614

1468-
tx_dma_desc = NULL;
14691615
if (spi->tx_buf && spi->dma_tx) {
14701616
stm32_spi_dma_config(spi, spi->dma_tx, &tx_dma_conf, DMA_MEM_TO_DEV);
14711617
dmaengine_slave_config(spi->dma_tx, &tx_dma_conf);
1472-
1473-
tx_dma_desc = dmaengine_prep_slave_sg(
1474-
spi->dma_tx, xfer->tx_sg.sgl,
1475-
xfer->tx_sg.nents,
1476-
tx_dma_conf.direction,
1477-
DMA_PREP_INTERRUPT);
1618+
tx_dma_desc = dmaengine_prep_slave_sg(spi->dma_tx, xfer->tx_sg.sgl,
1619+
xfer->tx_sg.nents,
1620+
tx_dma_conf.direction,
1621+
DMA_PREP_INTERRUPT);
14781622
}
14791623

14801624
if ((spi->tx_buf && spi->dma_tx && !tx_dma_desc) ||
@@ -1485,9 +1629,25 @@ static int stm32_spi_transfer_one_dma(struct stm32_spi *spi,
14851629
goto dma_desc_error;
14861630

14871631
if (rx_dma_desc) {
1488-
rx_dma_desc->callback = spi->cfg->dma_rx_cb;
1489-
rx_dma_desc->callback_param = spi;
1632+
if (rx_mdma_desc) {
1633+
rx_mdma_desc->callback = spi->cfg->dma_rx_cb;
1634+
rx_mdma_desc->callback_param = spi;
1635+
} else {
1636+
rx_dma_desc->callback = spi->cfg->dma_rx_cb;
1637+
rx_dma_desc->callback_param = spi;
1638+
}
14901639

1640+
/* Enable Rx DMA request */
1641+
stm32_spi_set_bits(spi, spi->cfg->regs->dma_rx_en.reg,
1642+
spi->cfg->regs->dma_rx_en.mask);
1643+
if (rx_mdma_desc) {
1644+
if (dma_submit_error(dmaengine_submit(rx_mdma_desc))) {
1645+
dev_err(spi->dev, "Rx MDMA submit failed\n");
1646+
goto dma_desc_error;
1647+
}
1648+
/* Enable Rx MDMA channel */
1649+
dma_async_issue_pending(spi->mdma_rx);
1650+
}
14911651
if (dma_submit_error(dmaengine_submit(rx_dma_desc))) {
14921652
dev_err(spi->dev, "Rx DMA submit failed\n");
14931653
goto dma_desc_error;
@@ -1522,6 +1682,8 @@ static int stm32_spi_transfer_one_dma(struct stm32_spi *spi,
15221682
return 1;
15231683

15241684
dma_submit_error:
1685+
if (spi->mdma_rx)
1686+
dmaengine_terminate_sync(spi->mdma_rx);
15251687
if (spi->dma_rx)
15261688
dmaengine_terminate_sync(spi->dma_rx);
15271689

@@ -1533,6 +1695,9 @@ static int stm32_spi_transfer_one_dma(struct stm32_spi *spi,
15331695

15341696
dev_info(spi->dev, "DMA issue: fall back to irq transfer\n");
15351697

1698+
if (spi->sram_rx_buf)
1699+
memset(spi->sram_rx_buf, 0, spi->sram_rx_buf_size);
1700+
15361701
spi->cur_usedma = false;
15371702
return spi->cfg->transfer_one_irq(spi);
15381703
}
@@ -1891,6 +2056,9 @@ static int stm32_spi_unprepare_msg(struct spi_controller *ctrl,
18912056

18922057
spi->cfg->disable(spi);
18932058

2059+
if (spi->sram_rx_buf)
2060+
memset(spi->sram_rx_buf, 0, spi->sram_rx_buf_size);
2061+
18942062
return 0;
18952063
}
18962064

@@ -2245,6 +2413,33 @@ static int stm32_spi_probe(struct platform_device *pdev)
22452413
if (spi->dma_tx || spi->dma_rx)
22462414
ctrl->can_dma = stm32_spi_can_dma;
22472415

2416+
spi->sram_pool = of_gen_pool_get(pdev->dev.of_node, "sram", 0);
2417+
if (spi->sram_pool) {
2418+
spi->sram_rx_buf_size = gen_pool_size(spi->sram_pool);
2419+
dev_info(&pdev->dev, "SRAM pool: %zu KiB for RX DMA/MDMA chaining\n",
2420+
spi->sram_rx_buf_size / 1024);
2421+
spi->sram_rx_buf = gen_pool_dma_zalloc(spi->sram_pool, spi->sram_rx_buf_size,
2422+
&spi->sram_dma_rx_buf);
2423+
if (!spi->sram_rx_buf) {
2424+
dev_err(&pdev->dev, "failed to allocate SRAM buffer\n");
2425+
} else {
2426+
spi->mdma_rx = dma_request_chan(spi->dev, "rxm2m");
2427+
if (IS_ERR(spi->mdma_rx)) {
2428+
ret = PTR_ERR(spi->mdma_rx);
2429+
spi->mdma_rx = NULL;
2430+
if (ret == -EPROBE_DEFER) {
2431+
goto err_pool_free;
2432+
} else {
2433+
gen_pool_free(spi->sram_pool,
2434+
(unsigned long)spi->sram_rx_buf,
2435+
spi->sram_rx_buf_size);
2436+
dev_warn(&pdev->dev,
2437+
"failed to request rx mdma channel, DMA only\n");
2438+
}
2439+
}
2440+
}
2441+
}
2442+
22482443
pm_runtime_set_autosuspend_delay(&pdev->dev,
22492444
STM32_SPI_AUTOSUSPEND_DELAY);
22502445
pm_runtime_use_autosuspend(&pdev->dev);
@@ -2272,6 +2467,11 @@ static int stm32_spi_probe(struct platform_device *pdev)
22722467
pm_runtime_put_noidle(&pdev->dev);
22732468
pm_runtime_set_suspended(&pdev->dev);
22742469
pm_runtime_dont_use_autosuspend(&pdev->dev);
2470+
2471+
if (spi->mdma_rx)
2472+
dma_release_channel(spi->mdma_rx);
2473+
err_pool_free:
2474+
gen_pool_free(spi->sram_pool, (unsigned long)spi->sram_rx_buf, spi->sram_rx_buf_size);
22752475
err_dma_release:
22762476
if (spi->dma_tx)
22772477
dma_release_channel(spi->dma_tx);
@@ -2302,6 +2502,11 @@ static void stm32_spi_remove(struct platform_device *pdev)
23022502
dma_release_channel(ctrl->dma_tx);
23032503
if (ctrl->dma_rx)
23042504
dma_release_channel(ctrl->dma_rx);
2505+
if (spi->mdma_rx)
2506+
dma_release_channel(spi->mdma_rx);
2507+
if (spi->sram_rx_buf)
2508+
gen_pool_free(spi->sram_pool, (unsigned long)spi->sram_rx_buf,
2509+
spi->sram_rx_buf_size);
23052510

23062511
clk_disable_unprepare(spi->clk);
23072512

0 commit comments

Comments
 (0)