diff mbox

[RFC,2/3] RDMA/cma: Add support for netlink statistics export

Message ID 1305303525-11113-3-git-send-email-roland@kernel.org
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Roland Dreier May 13, 2011, 4:18 p.m. UTC
From: Nir Muchtar <nirm@voltaire.com>

[Dave please do not apply even if this ends up in netdev patchwork!]

Add callbacks and data types for statistics export of all current
devices/ids.  The schema for RDMA CM is a series of netlink messages.
Each one contains an rdma_cm_stat struct.  Additionally, two netlink
attributes are created for the addresses for each message (if
applicable).

Their types used are:
RDMA_NL_RDMA_CM_ATTR_SRC_ADDR (The source address for this ID)
RDMA_NL_RDMA_CM_ATTR_DST_ADDR (The destination address for this ID)
sockaddr_* structs are encapsulated within these attributes.

In other words, every transaction contains a series of messages like:

-------message 1-------
struct rdma_cm_id_stats {
       __u32 qp_num;
       __u32 bound_dev_if;
       __u32 port_space;
       __s32 pid;
       __u8 cm_state;
       __u8 node_type;
       __u8 port_num;
       __u8 reserved;
}
RDMA_NL_RDMA_CM_ATTR_SRC_ADDR attribute - contains the source address
RDMA_NL_RDMA_CM_ATTR_DST_ADDR attribute - contains the destination address
-------end 1-------
-------message 2-------
struct rdma_cm_id_stats
RDMA_NL_RDMA_CM_ATTR_SRC_ADDR attribute
RDMA_NL_RDMA_CM_ATTR_DST_ADDR attribute
-------end 2-------

NOT-Signed-off-by: Nir Muchtar <nirm@voltaire.com>
NOT-Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/cma.c |   98 +++++++++++++++++++++++++++++++++++++++++
 include/rdma/rdma_netlink.h   |   28 ++++++++++++
 2 files changed, 126 insertions(+), 0 deletions(-)

Comments

Sean Hefty May 13, 2011, 5:21 p.m. UTC | #1
> +struct rdma_cm_id_stats {
> +	__u32	qp_num;
> +	__u32	bound_dev_if;
> +	__u32	port_space;
> +	__s32	pid;
> +	__u8	cm_state;
> +	__u8	node_type;
> +	__u8	port_num;
> +	__u8	reserved;
> +};

We may also want to add qp_type
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz May 16, 2011, 8:05 a.m. UTC | #2
Hefty, Sean wrote:
>> +struct rdma_cm_id_stats {
>> +	__u32	qp_num;
>> +	__u32	bound_dev_if;
>> +	__u32	port_space;
>> +	__s32	pid;
>> +	__u8	cm_state;
>> +	__u8	node_type;
>> +	__u8	port_num;
>> +	__u8	reserved;
>> +};
> 
> We may also want to add qp_type

Sean,

Isn't the port space enough here? specifically, what qp type buys us 
over port space?

Or.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sean Hefty May 16, 2011, 3:08 p.m. UTC | #3
> Isn't the port space enough here? specifically, what qp type buys us
> over port space?

That assumes a 1:1 relationship between the port space and QP type, or more specifically, that the IB SID range is divided based on QP type.  This is outside of the architecture.  If we look at UC or XRC, it's not clear what port space those map to, but I believe it's desirable for the rdma_cm to support those.  Note that the AF_IB patch set ended up requiring that the user specify the qp type for these reasons.

- Sean
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz May 16, 2011, 7:13 p.m. UTC | #4
Hefty, Sean <sean.hefty@intel.com> wrote:

> That assumes a 1:1 relationship between the port space and QP type, or more
> specifically, that the IB SID range is divided based on QP type.  This is outside of the
> architecture.  If we look at UC or XRC, it's not clear what port space those map to, but I
> believe it's desirable for the rdma_cm to support those.

makes sense, so we can add the qp type here.

Or.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roland Dreier May 19, 2011, 6:10 p.m. UTC | #5
On Fri, May 13, 2011 at 10:21 AM, Hefty, Sean <sean.hefty@intel.com> wrote:
>> +struct rdma_cm_id_stats {
>> +     __u32   qp_num;
>> +     __u32   bound_dev_if;
>> +     __u32   port_space;
>> +     __s32   pid;
>> +     __u8    cm_state;
>> +     __u8    node_type;
>> +     __u8    port_num;
>> +     __u8    reserved;
>> +};
>
> We may also want to add qp_type

Is there an easy way to get the qp_type from a struct rdma_cm_id?

ie what code needs to go into cma_get_id_stats() to handle this?

Thanks,
  Roland
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sean Hefty May 19, 2011, 6:35 p.m. UTC | #6
> Is there an easy way to get the qp_type from a struct rdma_cm_id?
> 
> ie what code needs to go into cma_get_id_stats() to handle this?

With the current code, you'd need to map from the port space:

static inline enum ib_qp_type cma_get_qp_type(struct rdma_cm_id *id)
{
	if (id->ps == RDMA_PS_IPOIB || id->ps == RDMA_PS_UDP)
		return IB_QPT_RC;
	else
		return IB_QPT_RC;
}

	..
	id_stats->qp_type = cma_get_qp_type(id);
	..

One of the patches in the af_ib patch set adds the qp_type to struct rdma_cm_id.  I'm guessing that patch will also be needed for xrc.

- Sean
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roland Dreier May 19, 2011, 6:49 p.m. UTC | #7
On Thu, May 19, 2011 at 11:35 AM, Hefty, Sean <sean.hefty@intel.com> wrote:
> One of the patches in the af_ib patch set adds the qp_type to struct rdma_cm_id.  I'm guessing that patch will also be needed for xrc.

Would it make sense to pull that patch at least in?

Sorry I haven't had time to really think about AF_IB in general but
maybe I can at least merge the netlink stuff this cycle?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roland Dreier May 19, 2011, 6:53 p.m. UTC | #8
On Thu, May 19, 2011 at 11:49 AM, Roland Dreier <roland@kernel.org> wrote:
> On Thu, May 19, 2011 at 11:35 AM, Hefty, Sean <sean.hefty@intel.com> wrote:
>> One of the patches in the af_ib patch set adds the qp_type to struct rdma_cm_id.  I'm guessing that patch will also be needed for xrc.
>
> Would it make sense to pull that patch at least in?
>
> Sorry I haven't had time to really think about AF_IB in general but
> maybe I can at least merge the netlink stuff this cycle?

And actually looking at https://patchwork.kernel.org/patch/90204/
it looks reasonable to merge now... or I could just use
cma_is_ud_ps() in the netlink stuff.

I'm inclined to just take your patch now if that makes sense to you
(it looks pretty independent of everything else)

Is that the latest version in patchwork?

Thanks,
 - R.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sean Hefty May 19, 2011, 7:03 p.m. UTC | #9
> And actually looking at https://patchwork.kernel.org/patch/90204/
> it looks reasonable to merge now... or I could just use
> cma_is_ud_ps() in the netlink stuff.
> 
> I'm inclined to just take your patch now if that makes sense to you
> (it looks pretty independent of everything else)
> 
> Is that the latest version in patchwork?

The only difference that I see between that patch and what's in my tree are the line numbers.  The patch looks safe enough to me to go in separately from anything else.  Thanks

- Sean
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 05b55e4..d4701a8 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -47,6 +47,7 @@ 
 
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_cm_ib.h>
+#include <rdma/rdma_netlink.h>
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
@@ -3247,6 +3248,98 @@  static void cma_remove_one(struct ib_device *device)
 	kfree(cma_dev);
 }
 
+static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlmsghdr *nlh;
+	struct rdma_cm_id_stats *id_stats;
+	struct rdma_id_private *id_priv;
+	struct rdma_cm_id *id = NULL;
+	struct cma_device *cma_dev;
+	int i_dev = 0, i_id = 0;
+
+	/*
+	 * We export all of the IDs as a sequence of messages.  Each
+	 * ID gets its own netlink message.
+	 */
+	mutex_lock(&lock);
+
+	list_for_each_entry(cma_dev, &dev_list, list) {
+		if (i_dev < cb->args[0]) {
+			i_dev++;
+			continue;
+		}
+
+		i_id = 0;
+		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+			if (i_id < cb->args[1]) {
+				i_id++;
+				continue;
+			}
+
+			id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
+						sizeof *id_stats, RDMA_NL_RDMA_CM,
+						RDMA_NL_RDMA_CM_ID_STATS);
+			if (!id_stats)
+				goto out;
+
+			memset(id_stats, 0, sizeof *id_stats);
+			id = &id_priv->id;
+			id_stats->node_type = id->route.addr.dev_addr.dev_type;
+			id_stats->port_num = id->port_num;
+			id_stats->bound_dev_if =
+				id->route.addr.dev_addr.bound_dev_if;
+
+			if (id->route.addr.src_addr.ss_family == AF_INET) {
+				if (ibnl_put_attr(skb, nlh,
+						  sizeof(struct sockaddr_in),
+						  &id->route.addr.src_addr,
+						  RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) {
+					goto out;
+				}
+				if (ibnl_put_attr(skb, nlh,
+						  sizeof(struct sockaddr_in),
+						  &id->route.addr.dst_addr,
+						  RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
+					goto out;
+				}
+			} else if (id->route.addr.src_addr.ss_family == AF_INET6) {
+				if (ibnl_put_attr(skb, nlh,
+						  sizeof(struct sockaddr_in6),
+						  &id->route.addr.src_addr,
+						  RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) {
+					goto out;
+				}
+				if (ibnl_put_attr(skb, nlh,
+						  sizeof(struct sockaddr_in6),
+						  &id->route.addr.dst_addr,
+						  RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
+					goto out;
+				}
+			}
+
+			id_stats->port_space = id->ps;
+			id_stats->cm_state = id_priv->state;
+			id_stats->qp_num = id_priv->qp_num;
+
+			i_id++;
+		}
+
+		cb->args[1] = 0;
+		i_dev++;
+	}
+
+out:
+	mutex_unlock(&lock);
+	cb->args[0] = i_dev;
+	cb->args[1] = i_id;
+
+	return skb->len;
+}
+
+static const struct ibnl_client_cbs cma_cb_table[] = {
+	[RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats },
+};
+
 static int __init cma_init(void)
 {
 	int ret;
@@ -3262,6 +3355,10 @@  static int __init cma_init(void)
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
+
+	if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
+		printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+
 	return 0;
 
 err:
@@ -3274,6 +3371,7 @@  err:
 
 static void __exit cma_cleanup(void)
 {
+	ibnl_remove_client(RDMA_NL_RDMA_CM);
 	ib_unregister_client(&cma_client);
 	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index c983a19..fa318af 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -1,10 +1,38 @@ 
 #ifndef _RDMA_NETLINK_H
 #define _RDMA_NETLINK_H
 
+#include <linux/types.h>
+
+enum {
+	RDMA_NL_RDMA_CM = 1
+};
+
 #define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
 #define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1))
 #define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op)
 
+enum {
+	RDMA_NL_RDMA_CM_ID_STATS = 0,
+	RDMA_NL_RDMA_CM_NUM_OPS
+};
+
+enum {
+	RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1,
+	RDMA_NL_RDMA_CM_ATTR_DST_ADDR,
+	RDMA_NL_RDMA_CM_NUM_ATTR,
+};
+
+struct rdma_cm_id_stats {
+	__u32	qp_num;
+	__u32	bound_dev_if;
+	__u32	port_space;
+	__s32	pid;
+	__u8	cm_state;
+	__u8	node_type;
+	__u8	port_num;
+	__u8	reserved;
+};
+
 #ifdef __KERNEL__
 
 #include <linux/netlink.h>