Skip to content

Commit f926c02

Browse files
ivitrogregkh
authored andcommitted
can: mcp251xfd: fix infinite loop when xmit fails
commit d8fb63e upstream. When the mcp251xfd_start_xmit() function fails, the driver stops processing messages, and the interrupt routine does not return, running indefinitely even after killing the running application. Error messages: [ 441.298819] mcp251xfd spi2.0 can0: ERROR in mcp251xfd_start_xmit: -16 [ 441.306498] mcp251xfd spi2.0 can0: Transmit Event FIFO buffer not empty. (seq=0x000017c7, tef_tail=0x000017cf, tef_head=0x000017d0, tx_head=0x000017d3). ... and repeat forever. The issue can be triggered when multiple devices share the same SPI interface. And there is concurrent access to the bus. The problem occurs because tx_ring->head increments even if mcp251xfd_start_xmit() fails. Consequently, the driver skips one TX package while still expecting a response in mcp251xfd_handle_tefif_one(). Resolve the issue by starting a workqueue to write the tx obj synchronously if err = -EBUSY. In case of another error, decrement tx_ring->head, remove skb from the echo stack, and drop the message. Fixes: 55e5b97 ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Cc: [email protected] Signed-off-by: Vitor Soares <[email protected]> Link: https://lore.kernel.org/all/[email protected] [mkl: use more imperative wording in patch description] Signed-off-by: Marc Kleine-Budde <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 778a8e6 commit f926c02

File tree

3 files changed

+65
-9
lines changed

3 files changed

+65
-9
lines changed

drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1618,11 +1618,20 @@ static int mcp251xfd_open(struct net_device *ndev)
16181618
clear_bit(MCP251XFD_FLAGS_DOWN, priv->flags);
16191619
can_rx_offload_enable(&priv->offload);
16201620

1621+
priv->wq = alloc_ordered_workqueue("%s-mcp251xfd_wq",
1622+
WQ_FREEZABLE | WQ_MEM_RECLAIM,
1623+
dev_name(&spi->dev));
1624+
if (!priv->wq) {
1625+
err = -ENOMEM;
1626+
goto out_can_rx_offload_disable;
1627+
}
1628+
INIT_WORK(&priv->tx_work, mcp251xfd_tx_obj_write_sync);
1629+
16211630
err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq,
16221631
IRQF_SHARED | IRQF_ONESHOT,
16231632
dev_name(&spi->dev), priv);
16241633
if (err)
1625-
goto out_can_rx_offload_disable;
1634+
goto out_destroy_workqueue;
16261635

16271636
err = mcp251xfd_chip_interrupts_enable(priv);
16281637
if (err)
@@ -1634,6 +1643,8 @@ static int mcp251xfd_open(struct net_device *ndev)
16341643

16351644
out_free_irq:
16361645
free_irq(spi->irq, priv);
1646+
out_destroy_workqueue:
1647+
destroy_workqueue(priv->wq);
16371648
out_can_rx_offload_disable:
16381649
can_rx_offload_disable(&priv->offload);
16391650
set_bit(MCP251XFD_FLAGS_DOWN, priv->flags);
@@ -1661,6 +1672,7 @@ static int mcp251xfd_stop(struct net_device *ndev)
16611672
hrtimer_cancel(&priv->tx_irq_timer);
16621673
mcp251xfd_chip_interrupts_disable(priv);
16631674
free_irq(ndev->irq, priv);
1675+
destroy_workqueue(priv->wq);
16641676
can_rx_offload_disable(&priv->offload);
16651677
mcp251xfd_timestamp_stop(priv);
16661678
mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);

drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,39 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
131131
tx_obj->xfer[0].len = len;
132132
}
133133

134+
static void mcp251xfd_tx_failure_drop(const struct mcp251xfd_priv *priv,
135+
struct mcp251xfd_tx_ring *tx_ring,
136+
int err)
137+
{
138+
struct net_device *ndev = priv->ndev;
139+
struct net_device_stats *stats = &ndev->stats;
140+
unsigned int frame_len = 0;
141+
u8 tx_head;
142+
143+
tx_ring->head--;
144+
stats->tx_dropped++;
145+
tx_head = mcp251xfd_get_tx_head(tx_ring);
146+
can_free_echo_skb(ndev, tx_head, &frame_len);
147+
netdev_completed_queue(ndev, 1, frame_len);
148+
netif_wake_queue(ndev);
149+
150+
if (net_ratelimit())
151+
netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
152+
}
153+
154+
void mcp251xfd_tx_obj_write_sync(struct work_struct *work)
155+
{
156+
struct mcp251xfd_priv *priv = container_of(work, struct mcp251xfd_priv,
157+
tx_work);
158+
struct mcp251xfd_tx_obj *tx_obj = priv->tx_work_obj;
159+
struct mcp251xfd_tx_ring *tx_ring = priv->tx;
160+
int err;
161+
162+
err = spi_sync(priv->spi, &tx_obj->msg);
163+
if (err)
164+
mcp251xfd_tx_failure_drop(priv, tx_ring, err);
165+
}
166+
134167
static int mcp251xfd_tx_obj_write(const struct mcp251xfd_priv *priv,
135168
struct mcp251xfd_tx_obj *tx_obj)
136169
{
@@ -162,6 +195,11 @@ static bool mcp251xfd_tx_busy(const struct mcp251xfd_priv *priv,
162195
return false;
163196
}
164197

198+
static bool mcp251xfd_work_busy(struct work_struct *work)
199+
{
200+
return work_busy(work);
201+
}
202+
165203
netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
166204
struct net_device *ndev)
167205
{
@@ -175,7 +213,8 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
175213
if (can_dev_dropped_skb(ndev, skb))
176214
return NETDEV_TX_OK;
177215

178-
if (mcp251xfd_tx_busy(priv, tx_ring))
216+
if (mcp251xfd_tx_busy(priv, tx_ring) ||
217+
mcp251xfd_work_busy(&priv->tx_work))
179218
return NETDEV_TX_BUSY;
180219

181220
tx_obj = mcp251xfd_get_tx_obj_next(tx_ring);
@@ -193,13 +232,13 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
193232
netdev_sent_queue(priv->ndev, frame_len);
194233

195234
err = mcp251xfd_tx_obj_write(priv, tx_obj);
196-
if (err)
197-
goto out_err;
198-
199-
return NETDEV_TX_OK;
200-
201-
out_err:
202-
netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
235+
if (err == -EBUSY) {
236+
netif_stop_queue(ndev);
237+
priv->tx_work_obj = tx_obj;
238+
queue_work(priv->wq, &priv->tx_work);
239+
} else if (err) {
240+
mcp251xfd_tx_failure_drop(priv, tx_ring, err);
241+
}
203242

204243
return NETDEV_TX_OK;
205244
}

drivers/net/can/spi/mcp251xfd/mcp251xfd.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,10 @@ struct mcp251xfd_priv {
628628
struct mcp251xfd_rx_ring *rx[MCP251XFD_FIFO_RX_NUM];
629629
struct mcp251xfd_tx_ring tx[MCP251XFD_FIFO_TX_NUM];
630630

631+
struct workqueue_struct *wq;
632+
struct work_struct tx_work;
633+
struct mcp251xfd_tx_obj *tx_work_obj;
634+
631635
DECLARE_BITMAP(flags, __MCP251XFD_FLAGS_SIZE__);
632636

633637
u8 rx_ring_num;
@@ -934,6 +938,7 @@ void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
934938
void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv);
935939
void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv);
936940

941+
void mcp251xfd_tx_obj_write_sync(struct work_struct *work);
937942
netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
938943
struct net_device *ndev);
939944

0 commit comments

Comments
 (0)