Skip to content

Commit 2dc9cfa

Browse files
net: mana: Handle Reset Request from MANA NIC
jira LE-3923 commit-author Haiyang Zhang <[email protected]> commit fbe346c upstream-diff There were conflicts seen when applying this patch due to the following missing commits :- ca8ac48 ("net: mana: Handle unsupported HWC commands") 505cc26 ("net: mana: Add support for auxiliary device servicing events") Upon receiving the Reset Request, pause the connection and clean up queues, wait for the specified period, then resume the NIC. In the cleanup phase, the HWC is no longer responding, so set hwc_timeout to zero to skip waiting on the response. Signed-off-by: Haiyang Zhang <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]> (cherry picked from commit fbe346c) Signed-off-by: Shreeya Patel <[email protected]>
1 parent fa0dcef commit 2dc9cfa

File tree

4 files changed

+143
-35
lines changed

4 files changed

+143
-35
lines changed

drivers/net/ethernet/microsoft/mana/gdma_main.c

Lines changed: 103 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/version.h>
99

1010
#include <net/mana/mana.h>
11+
#include <net/mana/hw_channel.h>
1112

1213
#include <linux/cpu.h>
1314
struct dentry *mana_debugfs_root;
@@ -64,6 +65,24 @@ static void mana_gd_init_registers(struct pci_dev *pdev)
6465
mana_gd_init_vf_regs(pdev);
6566
}
6667

68+
/* Suppress logging when we set timeout to zero */
69+
bool mana_need_log(struct gdma_context *gc, int err)
70+
{
71+
struct hw_channel_context *hwc;
72+
73+
if (err != -ETIMEDOUT)
74+
return true;
75+
76+
if (!gc)
77+
return true;
78+
79+
hwc = gc->hwc.driver_data;
80+
if (hwc && hwc->hwc_timeout == 0)
81+
return false;
82+
83+
return true;
84+
}
85+
6786
static int mana_gd_query_max_resources(struct pci_dev *pdev)
6887
{
6988
struct gdma_context *gc = pci_get_drvdata(pdev);
@@ -267,8 +286,9 @@ static int mana_gd_disable_queue(struct gdma_queue *queue)
267286

268287
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
269288
if (err || resp.hdr.status) {
270-
dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
271-
resp.hdr.status);
289+
if (mana_need_log(gc, err))
290+
dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
291+
resp.hdr.status);
272292
return err ? err : -EPROTO;
273293
}
274294

@@ -353,25 +373,12 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
353373

354374
#define MANA_SERVICE_PERIOD 10
355375

356-
struct mana_serv_work {
357-
struct work_struct serv_work;
358-
struct pci_dev *pdev;
359-
};
360-
361-
static void mana_serv_func(struct work_struct *w)
376+
static void mana_serv_fpga(struct pci_dev *pdev)
362377
{
363-
struct mana_serv_work *mns_wk;
364378
struct pci_bus *bus, *parent;
365-
struct pci_dev *pdev;
366-
367-
mns_wk = container_of(w, struct mana_serv_work, serv_work);
368-
pdev = mns_wk->pdev;
369379

370380
pci_lock_rescan_remove();
371381

372-
if (!pdev)
373-
goto out;
374-
375382
bus = pdev->bus;
376383
if (!bus) {
377384
dev_err(&pdev->dev, "MANA service: no bus\n");
@@ -392,7 +399,74 @@ static void mana_serv_func(struct work_struct *w)
392399

393400
out:
394401
pci_unlock_rescan_remove();
402+
}
403+
404+
static void mana_serv_reset(struct pci_dev *pdev)
405+
{
406+
struct gdma_context *gc = pci_get_drvdata(pdev);
407+
struct hw_channel_context *hwc;
408+
409+
if (!gc) {
410+
dev_err(&pdev->dev, "MANA service: no GC\n");
411+
return;
412+
}
413+
414+
hwc = gc->hwc.driver_data;
415+
if (!hwc) {
416+
dev_err(&pdev->dev, "MANA service: no HWC\n");
417+
goto out;
418+
}
419+
420+
/* HWC is not responding in this case, so don't wait */
421+
hwc->hwc_timeout = 0;
422+
423+
dev_info(&pdev->dev, "MANA reset cycle start\n");
395424

425+
mana_gd_suspend(pdev, PMSG_SUSPEND);
426+
427+
msleep(MANA_SERVICE_PERIOD * 1000);
428+
429+
mana_gd_resume(pdev);
430+
431+
dev_info(&pdev->dev, "MANA reset cycle completed\n");
432+
433+
out:
434+
gc->in_service = false;
435+
}
436+
437+
struct mana_serv_work {
438+
struct work_struct serv_work;
439+
struct pci_dev *pdev;
440+
enum gdma_eqe_type type;
441+
};
442+
443+
static void mana_serv_func(struct work_struct *w)
444+
{
445+
struct mana_serv_work *mns_wk;
446+
struct pci_dev *pdev;
447+
448+
mns_wk = container_of(w, struct mana_serv_work, serv_work);
449+
pdev = mns_wk->pdev;
450+
451+
if (!pdev)
452+
goto out;
453+
454+
switch (mns_wk->type) {
455+
case GDMA_EQE_HWC_FPGA_RECONFIG:
456+
mana_serv_fpga(pdev);
457+
break;
458+
459+
case GDMA_EQE_HWC_RESET_REQUEST:
460+
mana_serv_reset(pdev);
461+
break;
462+
463+
default:
464+
dev_err(&pdev->dev, "MANA service: unknown type %d\n",
465+
mns_wk->type);
466+
break;
467+
}
468+
469+
out:
396470
pci_dev_put(pdev);
397471
kfree(mns_wk);
398472
module_put(THIS_MODULE);
@@ -448,6 +522,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
448522
break;
449523

450524
case GDMA_EQE_HWC_FPGA_RECONFIG:
525+
case GDMA_EQE_HWC_RESET_REQUEST:
451526
dev_info(gc->dev, "Recv MANA service type:%d\n", type);
452527

453528
if (gc->in_service) {
@@ -469,6 +544,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
469544
dev_info(gc->dev, "Start MANA service type:%d\n", type);
470545
gc->in_service = true;
471546
mns_wk->pdev = to_pci_dev(gc->dev);
547+
mns_wk->type = type;
472548
pci_dev_get(mns_wk->pdev);
473549
INIT_WORK(&mns_wk->serv_work, mana_serv_func);
474550
schedule_work(&mns_wk->serv_work);
@@ -615,7 +691,8 @@ int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq)
615691

616692
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
617693
if (err) {
618-
dev_err(dev, "test_eq failed: %d\n", err);
694+
if (mana_need_log(gc, err))
695+
dev_err(dev, "test_eq failed: %d\n", err);
619696
goto out;
620697
}
621698

@@ -650,7 +727,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
650727

651728
if (flush_evenets) {
652729
err = mana_gd_test_eq(gc, queue);
653-
if (err)
730+
if (err && mana_need_log(gc, err))
654731
dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
655732
}
656733

@@ -796,8 +873,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle)
796873

797874
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
798875
if (err || resp.hdr.status) {
799-
dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
800-
err, resp.hdr.status);
876+
if (mana_need_log(gc, err))
877+
dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
878+
err, resp.hdr.status);
801879
return -EPROTO;
802880
}
803881

@@ -1096,8 +1174,9 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
10961174

10971175
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
10981176
if (err || resp.hdr.status) {
1099-
dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
1100-
err, resp.hdr.status);
1177+
if (mana_need_log(gc, err))
1178+
dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
1179+
err, resp.hdr.status);
11011180
if (!err)
11021181
err = -EPROTO;
11031182
}
@@ -1697,7 +1776,7 @@ static void mana_gd_remove(struct pci_dev *pdev)
16971776
}
16981777

16991778
/* The 'state' parameter is not used. */
1700-
static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
1779+
int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
17011780
{
17021781
struct gdma_context *gc = pci_get_drvdata(pdev);
17031782

@@ -1712,7 +1791,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
17121791
* fail -- if this happens, it's safer to just report an error than try to undo
17131792
* what has been done.
17141793
*/
1715-
static int mana_gd_resume(struct pci_dev *pdev)
1794+
int mana_gd_resume(struct pci_dev *pdev)
17161795
{
17171796
struct gdma_context *gc = pci_get_drvdata(pdev);
17181797
int err;

drivers/net/ethernet/microsoft/mana/hw_channel.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
854854

855855
if (!wait_for_completion_timeout(&ctx->comp_event,
856856
(msecs_to_jiffies(hwc->hwc_timeout)))) {
857-
dev_err(hwc->dev, "HWC: Request timed out!\n");
857+
if (hwc->hwc_timeout != 0)
858+
dev_err(hwc->dev, "HWC: Request timed out!\n");
859+
858860
err = -ETIMEDOUT;
859861
goto out;
860862
}

drivers/net/ethernet/microsoft/mana/mana_en.c

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ static const struct file_operations mana_dbg_q_fops = {
4545
.read = mana_dbg_q_read,
4646
};
4747

48+
static bool mana_en_need_log(struct mana_port_context *apc, int err)
49+
{
50+
if (apc && apc->ac && apc->ac->gdma_dev &&
51+
apc->ac->gdma_dev->gdma_context)
52+
return mana_need_log(apc->ac->gdma_dev->gdma_context, err);
53+
else
54+
return true;
55+
}
56+
4857
/* Microsoft Azure Network Adapter (MANA) functions */
4958

5059
static int mana_open(struct net_device *ndev)
@@ -768,7 +777,8 @@ static int mana_send_request(struct mana_context *ac, void *in_buf,
768777
err = mana_gd_send_request(gc, in_len, in_buf, out_len,
769778
out_buf);
770779
if (err || resp->status) {
771-
if (req->req.msg_type != MANA_QUERY_PHY_STAT)
780+
if (req->req.msg_type != MANA_QUERY_PHY_STAT &&
781+
mana_need_log(gc, err))
772782
dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
773783
err, resp->status);
774784
return err ? err : -EPROTO;
@@ -845,8 +855,10 @@ static void mana_pf_deregister_hw_vport(struct mana_port_context *apc)
845855
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
846856
sizeof(resp));
847857
if (err) {
848-
netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
849-
err);
858+
if (mana_en_need_log(apc, err))
859+
netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
860+
err);
861+
850862
return;
851863
}
852864

@@ -901,8 +913,10 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc)
901913
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
902914
sizeof(resp));
903915
if (err) {
904-
netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
905-
err);
916+
if (mana_en_need_log(apc, err))
917+
netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
918+
err);
919+
906920
return;
907921
}
908922

@@ -1132,7 +1146,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
11321146
err = mana_send_request(apc->ac, req, req_buf_size, &resp,
11331147
sizeof(resp));
11341148
if (err) {
1135-
netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
1149+
if (mana_en_need_log(apc, err))
1150+
netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
1151+
11361152
goto out;
11371153
}
11381154

@@ -1227,7 +1243,9 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
12271243
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
12281244
sizeof(resp));
12291245
if (err) {
1230-
netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
1246+
if (mana_en_need_log(apc, err))
1247+
netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
1248+
12311249
return;
12321250
}
12331251

@@ -2872,11 +2890,10 @@ static int mana_dealloc_queues(struct net_device *ndev)
28722890

28732891
apc->rss_state = TRI_STATE_FALSE;
28742892
err = mana_config_rss(apc, TRI_STATE_FALSE, false, false);
2875-
if (err) {
2893+
if (err && mana_en_need_log(apc, err))
28762894
netdev_err(ndev, "Failed to disable vPort: %d\n", err);
2877-
return err;
2878-
}
28792895

2896+
/* Even in err case, still need to cleanup the vPort */
28802897
mana_destroy_vport(apc);
28812898

28822899
return 0;

include/net/mana/gdma.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ enum gdma_eqe_type {
6060
GDMA_EQE_HWC_INIT_DONE = 131,
6161
GDMA_EQE_HWC_FPGA_RECONFIG = 132,
6262
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
63+
GDMA_EQE_HWC_RESET_REQUEST = 135,
6364
GDMA_EQE_RNIC_QP_FATAL = 176,
6465
};
6566

@@ -559,6 +560,9 @@ enum {
559560
/* Driver can handle holes (zeros) in the device list */
560561
#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
561562

563+
/* Driver can self reset on EQE notification */
564+
#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14)
565+
562566
/* Driver can self reset on FPGA Reconfig EQE notification */
563567
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
564568

@@ -568,6 +572,7 @@ enum {
568572
GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
569573
GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
570574
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
575+
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
571576
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
572577

573578
#define GDMA_DRV_CAP_FLAGS2 0
@@ -892,4 +897,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle);
892897
void mana_register_debugfs(void);
893898
void mana_unregister_debugfs(void);
894899

900+
int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state);
901+
int mana_gd_resume(struct pci_dev *pdev);
902+
903+
bool mana_need_log(struct gdma_context *gc, int err);
904+
895905
#endif /* _GDMA_H */

0 commit comments

Comments
 (0)