Skip to content

Commit dcb3fba

Browse files
danish-tikuba-moo
authored andcommitted
net: ti: icssg_prueth: Add SW TX / RX Coalescing based on hrtimers
Add SW IRQ coalescing based on hrtimers for RX and TX data path for ICSSG driver, which can be enabled by ethtool commands: - RX coalescing ethtool -C eth1 rx-usecs 50 - TX coalescing can be enabled per TX queue - by default enables coalescing for TX0 ethtool -C eth1 tx-usecs 50 - configure TX0 ethtool -Q eth0 queue_mask 1 --coalesce tx-usecs 100 - configure TX1 ethtool -Q eth0 queue_mask 2 --coalesce tx-usecs 100 - configure TX0 and TX1 ethtool -Q eth0 queue_mask 3 --coalesce tx-usecs 100 --coalesce tx-usecs 100 Minimum value for both rx-usecs and tx-usecs is 20us. Compared to gro_flush_timeout and napi_defer_hard_irqs this patch allows to enable IRQ coalescing for RX path separately. Benchmarking numbers: =============================================================== | Method | Tput_TX | CPU_TX | Tput_RX | CPU_RX | | ============================================================== | Default Driver 943 Mbps 31% 517 Mbps 38% | | IRQ Coalescing (Patch) 943 Mbps 28% 518 Mbps 25% | =============================================================== Signed-off-by: MD Danish Anwar <danishanwar@ti.com> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://lore.kernel.org/r/20240430120634.1558998-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 5165c48 commit dcb3fba

4 files changed

Lines changed: 155 additions & 8 deletions

File tree

drivers/net/ethernet/ti/icssg/icssg_common.c

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
122122
}
123123

124124
int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
125-
int budget)
125+
int budget, bool *tdown)
126126
{
127127
struct net_device *ndev = emac->ndev;
128128
struct cppi5_host_desc_t *desc_tx;
@@ -145,6 +145,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
145145
if (cppi5_desc_is_tdcm(desc_dma)) {
146146
if (atomic_dec_and_test(&emac->tdown_cnt))
147147
complete(&emac->tdown_complete);
148+
*tdown = true;
148149
break;
149150
}
150151

@@ -190,19 +191,37 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
190191
return num_tx;
191192
}
192193

194+
static enum hrtimer_restart emac_tx_timer_callback(struct hrtimer *timer)
195+
{
196+
struct prueth_tx_chn *tx_chns =
197+
container_of(timer, struct prueth_tx_chn, tx_hrtimer);
198+
199+
enable_irq(tx_chns->irq);
200+
return HRTIMER_NORESTART;
201+
}
202+
193203
static int emac_napi_tx_poll(struct napi_struct *napi_tx, int budget)
194204
{
195205
struct prueth_tx_chn *tx_chn = prueth_napi_to_tx_chn(napi_tx);
196206
struct prueth_emac *emac = tx_chn->emac;
207+
bool tdown = false;
197208
int num_tx_packets;
198209

199-
num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget);
210+
num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget,
211+
&tdown);
200212

201213
if (num_tx_packets >= budget)
202214
return budget;
203215

204-
if (napi_complete_done(napi_tx, num_tx_packets))
205-
enable_irq(tx_chn->irq);
216+
if (napi_complete_done(napi_tx, num_tx_packets)) {
217+
if (unlikely(tx_chn->tx_pace_timeout_ns && !tdown)) {
218+
hrtimer_start(&tx_chn->tx_hrtimer,
219+
ns_to_ktime(tx_chn->tx_pace_timeout_ns),
220+
HRTIMER_MODE_REL_PINNED);
221+
} else {
222+
enable_irq(tx_chn->irq);
223+
}
224+
}
206225

207226
return num_tx_packets;
208227
}
@@ -226,6 +245,9 @@ int prueth_ndev_add_tx_napi(struct prueth_emac *emac)
226245
struct prueth_tx_chn *tx_chn = &emac->tx_chns[i];
227246

228247
netif_napi_add_tx(emac->ndev, &tx_chn->napi_tx, emac_napi_tx_poll);
248+
hrtimer_init(&tx_chn->tx_hrtimer, CLOCK_MONOTONIC,
249+
HRTIMER_MODE_REL_PINNED);
250+
tx_chn->tx_hrtimer.function = &emac_tx_timer_callback;
229251
ret = request_irq(tx_chn->irq, prueth_tx_irq,
230252
IRQF_TRIGGER_HIGH, tx_chn->name,
231253
tx_chn);
@@ -871,8 +893,15 @@ int emac_napi_rx_poll(struct napi_struct *napi_rx, int budget)
871893
break;
872894
}
873895

874-
if (num_rx < budget && napi_complete_done(napi_rx, num_rx))
875-
enable_irq(emac->rx_chns.irq[rx_flow]);
896+
if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
897+
if (unlikely(emac->rx_pace_timeout_ns)) {
898+
hrtimer_start(&emac->rx_hrtimer,
899+
ns_to_ktime(emac->rx_pace_timeout_ns),
900+
HRTIMER_MODE_REL_PINNED);
901+
} else {
902+
enable_irq(emac->rx_chns.irq[rx_flow]);
903+
}
904+
}
876905

877906
return num_rx;
878907
}

drivers/net/ethernet/ti/icssg/icssg_ethtool.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,93 @@ static void emac_get_rmon_stats(struct net_device *ndev,
201201
rmon_stats->hist_tx[4] = emac_get_stat_by_name(emac, "tx_bucket5_frames");
202202
}
203203

204+
static int emac_get_coalesce(struct net_device *ndev,
205+
struct ethtool_coalesce *coal,
206+
struct kernel_ethtool_coalesce *kernel_coal,
207+
struct netlink_ext_ack *extack)
208+
{
209+
struct prueth_emac *emac = netdev_priv(ndev);
210+
struct prueth_tx_chn *tx_chn;
211+
212+
tx_chn = &emac->tx_chns[0];
213+
214+
coal->rx_coalesce_usecs = emac->rx_pace_timeout_ns / 1000;
215+
coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000;
216+
217+
return 0;
218+
}
219+
220+
static int emac_get_per_queue_coalesce(struct net_device *ndev, u32 queue,
221+
struct ethtool_coalesce *coal)
222+
{
223+
struct prueth_emac *emac = netdev_priv(ndev);
224+
struct prueth_tx_chn *tx_chn;
225+
226+
if (queue >= PRUETH_MAX_TX_QUEUES)
227+
return -EINVAL;
228+
229+
tx_chn = &emac->tx_chns[queue];
230+
231+
coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000;
232+
233+
return 0;
234+
}
235+
236+
static int emac_set_coalesce(struct net_device *ndev,
237+
struct ethtool_coalesce *coal,
238+
struct kernel_ethtool_coalesce *kernel_coal,
239+
struct netlink_ext_ack *extack)
240+
{
241+
struct prueth_emac *emac = netdev_priv(ndev);
242+
struct prueth *prueth = emac->prueth;
243+
struct prueth_tx_chn *tx_chn;
244+
245+
tx_chn = &emac->tx_chns[0];
246+
247+
if (coal->rx_coalesce_usecs &&
248+
coal->rx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) {
249+
dev_info(prueth->dev, "defaulting to min value of %dus for rx-usecs\n",
250+
ICSSG_MIN_COALESCE_USECS);
251+
coal->rx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS;
252+
}
253+
254+
if (coal->tx_coalesce_usecs &&
255+
coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) {
256+
dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs\n",
257+
ICSSG_MIN_COALESCE_USECS);
258+
coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS;
259+
}
260+
261+
emac->rx_pace_timeout_ns = coal->rx_coalesce_usecs * 1000;
262+
tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000;
263+
264+
return 0;
265+
}
266+
267+
static int emac_set_per_queue_coalesce(struct net_device *ndev, u32 queue,
268+
struct ethtool_coalesce *coal)
269+
{
270+
struct prueth_emac *emac = netdev_priv(ndev);
271+
struct prueth *prueth = emac->prueth;
272+
struct prueth_tx_chn *tx_chn;
273+
274+
if (queue >= PRUETH_MAX_TX_QUEUES)
275+
return -EINVAL;
276+
277+
tx_chn = &emac->tx_chns[queue];
278+
279+
if (coal->tx_coalesce_usecs &&
280+
coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) {
281+
dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs for tx-%u\n",
282+
ICSSG_MIN_COALESCE_USECS, queue);
283+
coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS;
284+
}
285+
286+
tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000;
287+
288+
return 0;
289+
}
290+
204291
const struct ethtool_ops icssg_ethtool_ops = {
205292
.get_drvinfo = emac_get_drvinfo,
206293
.get_msglevel = emac_get_msglevel,
@@ -209,6 +296,12 @@ const struct ethtool_ops icssg_ethtool_ops = {
209296
.get_ethtool_stats = emac_get_ethtool_stats,
210297
.get_strings = emac_get_strings,
211298
.get_ts_info = emac_get_ts_info,
299+
.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
300+
ETHTOOL_COALESCE_TX_USECS,
301+
.get_coalesce = emac_get_coalesce,
302+
.set_coalesce = emac_set_coalesce,
303+
.get_per_queue_coalesce = emac_get_per_queue_coalesce,
304+
.set_per_queue_coalesce = emac_set_per_queue_coalesce,
212305
.get_channels = emac_get_channels,
213306
.set_channels = emac_set_channels,
214307
.get_link_ksettings = emac_get_link_ksettings,

drivers/net/ethernet/ti/icssg/icssg_prueth.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,16 @@ static void emac_adjust_link(struct net_device *ndev)
243243
}
244244
}
245245

246+
static enum hrtimer_restart emac_rx_timer_callback(struct hrtimer *timer)
247+
{
248+
struct prueth_emac *emac =
249+
container_of(timer, struct prueth_emac, rx_hrtimer);
250+
int rx_flow = PRUETH_RX_FLOW_DATA;
251+
252+
enable_irq(emac->rx_chns.irq[rx_flow]);
253+
return HRTIMER_NORESTART;
254+
}
255+
246256
static int emac_phy_connect(struct prueth_emac *emac)
247257
{
248258
struct prueth *prueth = emac->prueth;
@@ -582,15 +592,18 @@ static int emac_ndo_stop(struct net_device *ndev)
582592
netdev_err(ndev, "tx teardown timeout\n");
583593

584594
prueth_reset_tx_chan(emac, emac->tx_ch_num, true);
585-
for (i = 0; i < emac->tx_ch_num; i++)
595+
for (i = 0; i < emac->tx_ch_num; i++) {
586596
napi_disable(&emac->tx_chns[i].napi_tx);
597+
hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer);
598+
}
587599

588600
max_rx_flows = PRUETH_MAX_RX_FLOWS;
589601
k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true);
590602

591603
prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true);
592604

593605
napi_disable(&emac->napi_rx);
606+
hrtimer_cancel(&emac->rx_hrtimer);
594607

595608
cancel_work_sync(&emac->rx_mode_work);
596609

@@ -801,6 +814,9 @@ static int prueth_netdev_init(struct prueth *prueth,
801814
ndev->features = ndev->hw_features;
802815

803816
netif_napi_add(ndev, &emac->napi_rx, emac_napi_rx_poll);
817+
hrtimer_init(&emac->rx_hrtimer, CLOCK_MONOTONIC,
818+
HRTIMER_MODE_REL_PINNED);
819+
emac->rx_hrtimer.function = &emac_rx_timer_callback;
804820
prueth->emac[mac] = emac;
805821

806822
return 0;

drivers/net/ethernet/ti/icssg/icssg_prueth.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ struct prueth_tx_chn {
108108
u32 descs_num;
109109
unsigned int irq;
110110
char name[32];
111+
struct hrtimer tx_hrtimer;
112+
unsigned long tx_pace_timeout_ns;
111113
};
112114

113115
struct prueth_rx_chn {
@@ -127,6 +129,9 @@ struct prueth_rx_chn {
127129

128130
#define PRUETH_MAX_TX_TS_REQUESTS 50 /* Max simultaneous TX_TS requests */
129131

132+
/* Minimum coalesce time in usecs for both Tx and Rx */
133+
#define ICSSG_MIN_COALESCE_USECS 20
134+
130135
/* data for each emac port */
131136
struct prueth_emac {
132137
bool is_sr1;
@@ -183,6 +188,10 @@ struct prueth_emac {
183188

184189
struct delayed_work stats_work;
185190
u64 stats[ICSSG_NUM_STATS];
191+
192+
/* RX IRQ Coalescing Related */
193+
struct hrtimer rx_hrtimer;
194+
unsigned long rx_pace_timeout_ns;
186195
};
187196

188197
/**
@@ -320,7 +329,7 @@ void prueth_ndev_del_tx_napi(struct prueth_emac *emac, int num);
320329
void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
321330
struct cppi5_host_desc_t *desc);
322331
int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
323-
int budget);
332+
int budget, bool *tdown);
324333
int prueth_ndev_add_tx_napi(struct prueth_emac *emac);
325334
int prueth_init_tx_chns(struct prueth_emac *emac);
326335
int prueth_init_rx_chns(struct prueth_emac *emac,

0 commit comments

Comments
 (0)