Message ID | 20240703125922.5625-4-mateusz.polchlopek@intel.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Add support for devlink health events | expand |
Hi Mateusz, kernel test robot noticed the following build warnings: [auto build test WARNING on tnguy-next-queue/dev-queue] url: https://github.com/intel-lab-lkp/linux/commits/Mateusz-Polchlopek/checkpatch-don-t-complain-on-_Generic-use/20240704-184910 base: https://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git dev-queue patch link: https://lore.kernel.org/r/20240703125922.5625-4-mateusz.polchlopek%40intel.com patch subject: [Intel-wired-lan] [PATCH iwl-next v1 3/6] ice: add Tx hang devlink health reporter config: i386-allmodconfig (https://download.01.org/0day-ci/archive/20240705/202407050857.OSYEyokn-lkp@intel.com/config) compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240705/202407050857.OSYEyokn-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202407050857.OSYEyokn-lkp@intel.com/ All warnings (new ones prefixed by >>): drivers/net/ethernet/intel/ice/devlink/devlink_health.c: In function 'ice_tx_hang_reporter_dump': >> drivers/net/ethernet/intel/ice/devlink/devlink_health.c:76:43: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 76 | ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)event->tx_ring->dma); | ^ vim +76 drivers/net/ethernet/intel/ice/devlink/devlink_health.c 60 61 static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter, 62 struct devlink_fmsg *fmsg, void *priv_ctx, 63 struct netlink_ext_ack *extack) 64 { 65 struct ice_tx_hang_event *event = priv_ctx; 66 67 devlink_fmsg_obj_nest_start(fmsg); 68 ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head); 69 ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr); 70 ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num); 71 ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue); 72 ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean); 73 ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use); 74 devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name); 75 ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc); > 76 ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)event->tx_ring->dma); 77 devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc, 78 size_mul(event->tx_ring->count, 79 sizeof(struct ice_tx_desc))); 80 devlink_fmsg_obj_nest_end(fmsg); 81 82 return 0; 83 } 84
On Wed, Jul 03, 2024 at 08:59:19AM -0400, Mateusz Polchlopek wrote: > From: Przemek Kitszel <przemyslaw.kitszel@intel.com> > > Add Tx hang devlink health reporter, see struct ice_tx_hang_event to see > what is reported. > > Subsequent commits will extend it by more info, for now it dumps > descriptors with little metadata. > > Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> > Reviewed-by: Igor Bagnucki <igor.bagnucki@intel.com> > Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com> > Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@intel.com> ... > +/** > + * ice_fmsg_put_ptr - put hex value of pointer into fmsg > + * > + * @fmsg: devlink fmsg under construction > + * @name: name to pass > + * @ptr: 64 bit value to print as hex and put into fmsg > + */ > +static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name, > + void *ptr) > +{ > + char buf[sizeof(ptr) * 3]; > + > + sprintf(buf, "%p", ptr); > + devlink_fmsg_put(fmsg, name, buf); > +} ... > +static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter, > + struct devlink_fmsg *fmsg, void *priv_ctx, > + struct netlink_ext_ack *extack) > +{ > + struct ice_tx_hang_event *event = priv_ctx; > + > + devlink_fmsg_obj_nest_start(fmsg); > + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head); > + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr); > + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num); > + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue); > + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean); > + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use); > + devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name); > + ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc); > + ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)event->tx_ring->dma); As reported by the kernel test robot, GCC 13 complains about this cast: .../devlink_health.c: In function 'ice_tx_hang_reporter_dump': .../devlink_health.c:76:43: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 76 | ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)event->tx_ring->dma); | Perhaps a good solution is to add a helper similar to ice_fmsg_put_ptr, but which takes a dma_buf_t rather than a void * as it's last argument. > + devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc, > + size_mul(event->tx_ring->count, > + sizeof(struct ice_tx_desc))); > + devlink_fmsg_obj_nest_end(fmsg); > + > + return 0; > +}
On 7/8/24 14:40, Simon Horman wrote: > On Wed, Jul 03, 2024 at 08:59:19AM -0400, Mateusz Polchlopek wrote: >> From: Przemek Kitszel <przemyslaw.kitszel@intel.com> >> >> Add Tx hang devlink health reporter, see struct ice_tx_hang_event to see >> what is reported. >> >> Subsequent commits will extend it by more info, for now it dumps >> descriptors with little metadata. >> >> Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> >> Reviewed-by: Igor Bagnucki <igor.bagnucki@intel.com> >> Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com> >> Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@intel.com> > > ... > >> +/** >> + * ice_fmsg_put_ptr - put hex value of pointer into fmsg >> + * >> + * @fmsg: devlink fmsg under construction >> + * @name: name to pass >> + * @ptr: 64 bit value to print as hex and put into fmsg >> + */ >> +static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name, >> + void *ptr) >> +{ >> + char buf[sizeof(ptr) * 3]; >> + >> + sprintf(buf, "%p", ptr); >> + devlink_fmsg_put(fmsg, name, buf); >> +} > > ... > >> +static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter, >> + struct devlink_fmsg *fmsg, void *priv_ctx, >> + struct netlink_ext_ack *extack) >> +{ >> + struct ice_tx_hang_event *event = priv_ctx; >> + >> + devlink_fmsg_obj_nest_start(fmsg); >> + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head); >> + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr); >> + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num); >> + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue); >> + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean); >> + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use); >> + devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name); >> + ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc); >> + ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)event->tx_ring->dma); > > As reported by the kernel test robot, GCC 13 complains about this cast: > > .../devlink_health.c: In function 'ice_tx_hang_reporter_dump': > .../devlink_health.c:76:43: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] > 76 | ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)event->tx_ring->dma); > | > > Perhaps a good solution is to add a helper similar to ice_fmsg_put_ptr, > but which takes a dma_buf_t rather than a void * as it's last argument. instead of duplicating the function for just one call, I will simply resolve the warning by yet another cast: ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)(long)event->tx_ring->dma); ^^^^^^ // cast to long added > >> + devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc, >> + size_mul(event->tx_ring->count, >> + sizeof(struct ice_tx_desc))); Here I would drop size_mul(), as any wrong ::count value could easily extent the dump past tx_ring memory, resulting in attempt at reading past their page And we are not really protecting against "too big" fmsg, as it is capped anyway to 4-8K. Perhaps fmsg-put also ::count to aid spotting such cases, but only if it is not the default 256. -- not a change request, just digression: it would be nice for devlink_fmsg_binary_pair_put() to compress "repeated same value", like hexdump(1) does. >> + devlink_fmsg_obj_nest_end(fmsg); >> + >> + return 0; >> +}
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 3307d551f431..f2baba82480c 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -33,6 +33,7 @@ ice-y := ice_main.o \ ice_idc.o \ devlink/devlink.o \ devlink/devlink_port.o \ + devlink/devlink_health.o \ ice_sf_eth.o \ ice_sf_vsi_vlan_ops.o \ ice_ddp.o \ diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_health.c b/drivers/net/ethernet/intel/ice/devlink/devlink_health.c new file mode 100644 index 000000000000..311719e69ea5 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/devlink_health.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Intel Corporation. */ + +#include "devlink_health.h" +#include "ice.h" + +#define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \ + devlink_fmsg_put(fmsg, #name, (obj)->name) + +/** + * ice_devlink_health_report - boilerplate to call given @reporter + * + * @reporter: devlink health reporter to call, do nothing on NULL + * @msg: message to pass up, "event name" is fine + * @priv_ctx: typically some event struct + */ +static void ice_devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + int err; + + if (!reporter) + return; + + err = devlink_health_report(reporter, msg, priv_ctx); + if (err) { + struct ice_pf *pf = devlink_health_reporter_priv(reporter); + + dev_err(ice_pf_to_dev(pf), + "failed to report %s via devlink health, err %d\n", + msg, err); + } +} + +/** + * ice_fmsg_put_ptr - put hex value of pointer into fmsg + * + * @fmsg: devlink fmsg under construction + * @name: name to pass + * @ptr: 64 bit value to print as hex and put into fmsg + */ +static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name, + void *ptr) +{ + char buf[sizeof(ptr) * 3]; + + sprintf(buf, "%p", ptr); + devlink_fmsg_put(fmsg, name, buf); +} + +struct ice_tx_hang_event { + u32 head; + u32 intr; + u16 vsi_num; + u16 queue; + u16 next_to_clean; + u16 next_to_use; + struct ice_tx_ring *tx_ring; +}; + +static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct ice_tx_hang_event *event = priv_ctx; + + devlink_fmsg_obj_nest_start(fmsg); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use); + devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name); + ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc); + ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)event->tx_ring->dma); + devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc, + size_mul(event->tx_ring->count, + sizeof(struct ice_tx_desc))); + devlink_fmsg_obj_nest_end(fmsg); + + return 0; +} + +void ice_report_tx_hang(struct ice_pf *pf, struct ice_tx_ring *tx_ring, + u16 vsi_num, u32 head, u32 intr) +{ + struct ice_tx_hang_event ev = { + .head = head, + .intr = intr, + .vsi_num = vsi_num, + .queue = tx_ring->q_index, + .next_to_clean = tx_ring->next_to_clean, + .next_to_use = tx_ring->next_to_use, + .tx_ring = tx_ring, + }; + + ice_devlink_health_report(pf->health_reporters.tx_hang, "Tx hang", &ev); +} + +static struct devlink_health_reporter * +ice_init_devlink_rep(struct ice_pf *pf, + const struct devlink_health_reporter_ops *ops) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct devlink_health_reporter *rep; + const u64 graceful_period = 0; + + rep = devl_health_reporter_create(devlink, ops, graceful_period, pf); + if (IS_ERR(rep)) { + struct device *dev = ice_pf_to_dev(pf); + + dev_err(dev, "failed to create devlink %s health report er", + ops->name); + return NULL; + } + return rep; +} + +#define ICE_DEFINE_HEALTH_REPORTER_OPS(_name) \ + static const struct devlink_health_reporter_ops ice_ ## _name ## _reporter_ops = { \ + .name = #_name, \ + .dump = ice_ ## _name ## _reporter_dump, \ +} + +ICE_DEFINE_HEALTH_REPORTER_OPS(tx_hang); + +/** + * ice_health_init - allocate and init all ice devlink health reporters and + * accompanied data + * + * @pf: PF struct + */ +void ice_health_init(struct ice_pf *pf) +{ + struct ice_health *reps = &pf->health_reporters; + + reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops); +} + +/** + * ice_deinit_devl_reporter - destroy given devlink health reporter + * @reporter: reporter to destroy + */ +static void ice_deinit_devl_reporter(struct devlink_health_reporter *reporter) +{ + if (reporter) + devl_health_reporter_destroy(reporter); +} + +/** + * ice_health_deinit - deallocate all ice devlink health reporters and + * accompanied data + * + * @pf: PF struct + */ +void ice_health_deinit(struct ice_pf *pf) +{ + ice_deinit_devl_reporter(pf->health_reporters.tx_hang); +} + +static +void ice_health_assign_healthy_state(struct devlink_health_reporter *reporter) +{ + if (reporter) + devlink_health_reporter_state_update(reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); +} + +/** + * ice_health_clear - clear devlink health issues after a reset + * @pf: the PF device structure + * + * Mark the PF in healthy state again after a reset has completed. + */ +void ice_health_clear(struct ice_pf *pf) +{ + ice_health_assign_healthy_state(pf->health_reporters.tx_hang); +} diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_health.h b/drivers/net/ethernet/intel/ice/devlink/devlink_health.h new file mode 100644 index 000000000000..984b8f9f56d4 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/devlink_health.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024, Intel Corporation. */ + +#ifndef _DEVLINK_HEALTH_H_ +#define _DEVLINK_HEALTH_H_ + +#include <linux/types.h> + +/** + * DOC: devlink_health.h + * + * This header file stores everything that is needed for broadly understood + * devlink health mechanism for ice driver. + */ + +struct ice_pf; +struct ice_tx_ring; + +/** + * struct ice_health - stores ice devlink health reporters and accompanied data + * @tx_hang: devlink health reporter for tx_hang event + */ +struct ice_health { + struct devlink_health_reporter *tx_hang; +}; + +void ice_health_init(struct ice_pf *pf); +void ice_health_deinit(struct ice_pf *pf); +void ice_health_clear(struct ice_pf *pf); + +void ice_report_tx_hang(struct ice_pf *pf, struct ice_tx_ring *tx_ring, + u16 vsi_num, u32 head, u32 intr); + +#endif /* _DEVLINK_HEALTH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 0046684004ff..d2f2ed2d4bfa 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -78,6 +78,7 @@ #include "ice_irq.h" #include "ice_dpll.h" #include "ice_adapter.h" +#include "devlink/devlink_health.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 @@ -667,6 +668,7 @@ struct ice_pf { struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES]; struct ice_dplls dplls; struct device *hwmon_dev; + struct ice_health health_reporters; }; extern struct workqueue_struct *ice_lag_wq; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 59c4264d8f9b..246dcfe54397 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5053,6 +5053,7 @@ static int ice_init_devlink(struct ice_pf *pf) return err; ice_devlink_init_regions(pf); + ice_health_init(pf); ice_devlink_register(pf); return 0; @@ -5061,6 +5062,7 @@ static int ice_init_devlink(struct ice_pf *pf) static void ice_deinit_devlink(struct ice_pf *pf) { ice_devlink_unregister(pf); + ice_health_deinit(pf); ice_devlink_destroy_regions(pf); ice_devlink_unregister_params(pf); } @@ -7744,6 +7746,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) /* if we get here, reset flow is successful */ clear_bit(ICE_RESET_FAILED, pf->state); + ice_health_clear(pf); + ice_plug_aux_dev(pf); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) ice_lag_rebuild(pf); @@ -8231,16 +8235,17 @@ void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) if (tx_ring) { struct ice_hw *hw = &pf->hw; - u32 head, val = 0; + u32 head, intr = 0; head = FIELD_GET(QTX_COMM_HEAD_HEAD_M, rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue]))); /* Read interrupt register */ - val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); + intr = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n", vsi->vsi_num, txqueue, tx_ring->next_to_clean, - head, tx_ring->next_to_use, val); + head, tx_ring->next_to_use, intr); + ice_report_tx_hang(pf, tx_ring, vsi->vsi_num, head, intr); } pf->tx_timeout_last_recovery = jiffies;