From patchwork Mon Dec 31 14:31:56 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eran Ben Elisha X-Patchwork-Id: 1019573 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=mellanox.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 43T0CP37H4z9sBQ for ; Tue, 1 Jan 2019 01:32:49 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727318AbeLaOcf (ORCPT ); Mon, 31 Dec 2018 09:32:35 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:52199 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726355AbeLaOc1 (ORCPT ); Mon, 31 Dec 2018 09:32:27 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from eranbe@mellanox.com) with ESMTPS (AES256-SHA encrypted); 31 Dec 2018 16:32:21 +0200 Received: from dev-l-vrt-198.mtl.labs.mlnx (dev-l-vrt-198.mtl.labs.mlnx [10.134.198.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wBVEWLYo017529; Mon, 31 Dec 2018 16:32:21 +0200 From: Eran Ben Elisha To: netdev@vger.kernel.org, "David S. Miller" , Jiri Pirko Cc: Moshe Shemesh , Aya Levin , Eran Ben Elisha , Tal Alon , Ariel Almog Subject: [PATCH RFC net-next 02/19] devlink: Add health reporter create/destroy functionality Date: Mon, 31 Dec 2018 16:31:56 +0200 Message-Id: <1546266733-9512-3-git-send-email-eranbe@mellanox.com> X-Mailer: git-send-email 1.8.4.3 In-Reply-To: <1546266733-9512-1-git-send-email-eranbe@mellanox.com> References: <1546266733-9512-1-git-send-email-eranbe@mellanox.com> Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Devlink health reporter is an instance for reporting, diagnosing and recovering from run time errors discovered by the reporters. Define it's data structure and supported operations. In addition, expose devlink API to create and destroy a reporter. Each devlink instance will hold it's own reporters list. As part of the allocation, driver shall provide a set of callbacks which will be used the devlink in order to handle health reports and user commands related to this reporter. In addition, driver is entitled to provide some priv pointer, which can be fetched from the reporter by devlink_health_reporter_priv function. For each reporter, devlink will hold a metadata of statistics, buffers and status. Signed-off-by: Eran Ben Elisha --- include/net/devlink.h | 58 ++++++++++++++++++++++ net/core/devlink.c | 112 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index 77c77319290a..6884a2571348 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -30,6 +30,7 @@ struct devlink { struct list_head param_list; struct list_head region_list; u32 snapshot_id; + struct list_head reporter_list; struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; @@ -424,6 +425,34 @@ struct devlink_region; typedef void devlink_snapshot_data_dest_t(const void *data); struct devlink_health_buffer; +struct devlink_health_reporter; + +/** + * struct devlink_health_reporter_ops - Reporter operations + * @name: reporter name + * objdump_size: objdump buffer size allocated by the devlink + * diagnose_size: diagnose buffer size allocated by the devlink + * recover: callback to recover from reported error + * if priv_ctx is NULL, run a full recover + * objdump: callback to dump an object + * if priv_ctx is NULL, run a full objdump + * diagnose: callback to diagnose the current status + */ + +struct devlink_health_reporter_ops { + char *name; + unsigned int objdump_size; + unsigned int diagnose_size; + int (*recover)(struct devlink_health_reporter *reporter, + void *priv_ctx); + int (*objdump)(struct devlink_health_reporter *reporter, + struct devlink_health_buffer **buffers_array, + unsigned int buffer_size, unsigned int num_buffers, + void *priv_ctx); + int (*diagnose)(struct devlink_health_reporter *reporter, + struct devlink_health_buffer **buffers_array, + unsigned int buffer_size, unsigned int num_buffers); +}; struct devlink_ops { int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); @@ -602,6 +631,16 @@ int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer, char *name); int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, void *data, int len); +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv); +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter); #else static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, @@ -919,6 +958,25 @@ devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, void *data, int len) { return 0; + +static inline struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv) +{ + return NULL; +} + +static inline void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ +} + +static inline void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter) +{ + return NULL; } #endif diff --git a/net/core/devlink.c b/net/core/devlink.c index 36018af086a0..a69d4679211f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4098,6 +4098,117 @@ devlink_health_buffer_snd(struct genl_info *info, return err; } +struct devlink_health_reporter { + struct list_head list; + struct devlink_health_buffer **objdump_buffers_array; + struct mutex objdump_lock; /* lock parallel read/write from objdump buffers */ + struct devlink_health_buffer **diagnose_buffers_array; + struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */ + void *priv; + const struct devlink_health_reporter_ops *ops; + struct devlink *devlink; + u64 graceful_period; + bool auto_recover; + u8 health_state; +}; + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter) +{ + return reporter->priv; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); + +static struct devlink_health_reporter * +devlink_health_reporter_find_by_name(struct devlink *devlink, + const char *reporter_name) +{ + struct devlink_health_reporter *reporter; + + list_for_each_entry(reporter, &devlink->reporter_list, list) + if (!strcmp(reporter->ops->name, reporter_name)) + return reporter; + return NULL; +} + +/** + * devlink_health_reporter_create - create devlink health reporter + * + * @devlink: devlink + * @ops: ops + * @graceful_period: to avoid recovery loops, in msecs + * @auto_recover: auto recover when error occurs + * @priv: priv + */ +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv) +{ + struct devlink_health_reporter *reporter; + + if (devlink_health_reporter_find_by_name(devlink, ops->name)) + return ERR_PTR(-EEXIST); + + if (WARN_ON(ops->objdump && !ops->objdump_size) || + WARN_ON(ops->diagnose && !ops->diagnose_size) || + WARN_ON(auto_recover && !ops->recover) || + WARN_ON(graceful_period && !ops->recover)) + return ERR_PTR(-EINVAL); + + reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); + if (!reporter) + return ERR_PTR(-ENOMEM); + + if (ops->objdump) { + reporter->objdump_buffers_array = + devlink_health_buffers_create(ops->objdump_size); + if (!reporter->objdump_buffers_array) + return ERR_PTR(-ENOMEM); + } + + if (ops->diagnose) { + reporter->diagnose_buffers_array = + devlink_health_buffers_create(ops->diagnose_size); + if (!reporter->diagnose_buffers_array) { + devlink_health_buffers_destroy(reporter->diagnose_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->objdump_size)); + return ERR_PTR(-ENOMEM); + } + } + + list_add_tail(&reporter->list, &devlink->reporter_list); + mutex_init(&reporter->objdump_lock); + mutex_init(&reporter->diagnose_lock); + + reporter->priv = priv; + reporter->ops = ops; + reporter->devlink = devlink; + reporter->graceful_period = graceful_period; + reporter->auto_recover = auto_recover; + + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_create); + +/** + * devlink_health_reporter_destroy - destroy devlink health reporter + * + * @reporter: devlink health reporter to destroy + */ +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + list_del(&reporter->list); + devlink_health_buffers_destroy(reporter->diagnose_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->objdump_size)); + devlink_health_buffers_destroy(reporter->diagnose_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size)); + kfree(reporter); +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -4383,6 +4494,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->resource_list); INIT_LIST_HEAD(&devlink->param_list); INIT_LIST_HEAD(&devlink->region_list); + INIT_LIST_HEAD(&devlink->reporter_list); mutex_init(&devlink->lock); return devlink; }