diff mbox series

[ovs-dev,v2,2/2] netlink-socket: Log extack error messages in netlink transactions.

Message ID 162869662938.39743.727725386813923175.stgit@fed.void
State Accepted
Headers show
Series netlink-socket: Improve logging in nl transactions. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Paolo Valerio Aug. 11, 2021, 3:43 p.m. UTC
During a netlink transaction, in case of replies of type NLMSG_ERROR,
the current behavior includes the translation of the error number
received into a string that describes the error code.

Netlink replies may carry a more descriptive error message, and
although it is possible to read those messages using the existing perf
tracepoint, it is more convenient to retrieve them directly from ovs.

This patch extends nl_msg_nlmsgerr() so that it retrieves the message
that later, if present, will be used by nl_sock_transact_multiple__()
in place of the generic descriptive form of the error number.  This is
particularly useful with tc that makes use of such kind of mechanism.

As an example, with this patch applied, the following generic message:

ovs|00239|netlink_socket|DBG|received NAK error=0 (Operation not supported)

becomes:

ovs|00239|netlink_socket|DBG|received NAK error=0 - Conntrack isn't enabled

The layout has been slightly modified to avoid nested parentheses.

Suggested-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
---
 lib/netlink-protocol.h |   19 +++++++++++++++++++
 lib/netlink-socket.c   |   22 +++++++++++++++-------
 lib/netlink.c          |   35 +++++++++++++++++++++++++++++++++--
 lib/netlink.h          |    2 +-
 4 files changed, 68 insertions(+), 10 deletions(-)

Comments

Ilya Maximets Jan. 16, 2022, 10:14 p.m. UTC | #1
On 8/11/21 17:43, Paolo Valerio wrote:
> During a netlink transaction, in case of replies of type NLMSG_ERROR,
> the current behavior includes the translation of the error number
> received into a string that describes the error code.
> 
> Netlink replies may carry a more descriptive error message, and
> although it is possible to read those messages using the existing perf
> tracepoint, it is more convenient to retrieve them directly from ovs.
> 
> This patch extends nl_msg_nlmsgerr() so that it retrieves the message
> that later, if present, will be used by nl_sock_transact_multiple__()
> in place of the generic descriptive form of the error number.  This is
> particularly useful with tc that makes use of such kind of mechanism.
> 
> As an example, with this patch applied, the following generic message:
> 
> ovs|00239|netlink_socket|DBG|received NAK error=0 (Operation not supported)
> 
> becomes:
> 
> ovs|00239|netlink_socket|DBG|received NAK error=0 - Conntrack isn't enabled
> 
> The layout has been slightly modified to avoid nested parentheses.
> 
> Suggested-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
> Signed-off-by: Paolo Valerio <pvalerio@redhat.com>
> ---

Thanks, Paolo and Marcelo!
Sorry, this one fell through the cracks.  Applied now.

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/lib/netlink-protocol.h b/lib/netlink-protocol.h
index ceded7915..6eaa7035a 100644
--- a/lib/netlink-protocol.h
+++ b/lib/netlink-protocol.h
@@ -186,4 +186,23 @@  enum {
 #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
 #endif /* CTRL_ATTR_MCAST_GRP_MAX */
 
+#ifndef NETLINK_EXT_ACK
+
+#define NETLINK_CAP_ACK         10
+#define NETLINK_EXT_ACK         11
+
+/* ACK message flags. */
+#define NLM_F_CAPPED            0x100
+#define NLM_F_ACK_TLVS          0x200
+
+enum {
+    NLMSGERR_ATTR_UNUSED,
+    NLMSGERR_ATTR_MSG,
+    NLMSGERR_ATTR_OFFS,
+    __NLMSGERR_ATTR_MAX,
+    NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
+};
+
+#endif /* NLM_F_ACK_TLVS */
+
 #endif /* netlink-protocol.h */
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 5867de564..93c1fa561 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -119,6 +119,7 @@  nl_sock_create(int protocol, struct nl_sock **sockp)
     struct nl_sock *sock;
 #ifndef _WIN32
     struct sockaddr_nl local, remote;
+    int one = 1;
 #endif
     socklen_t local_size;
     int rcvbuf;
@@ -189,6 +190,11 @@  nl_sock_create(int protocol, struct nl_sock **sockp)
         goto error;
     }
 #else
+    if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof one)) {
+        VLOG_WARN_RL(&rl, "setting extended ack support failed (%s)",
+                     ovs_strerror(errno));
+    }
+
     if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
                    &rcvbuf, sizeof rcvbuf)) {
         /* Only root can use SO_RCVBUFFORCE.  Everyone else gets EPERM.
@@ -910,15 +916,17 @@  nl_sock_transact_multiple__(struct nl_sock *sock,
         i = seq - base_seq;
         txn = transactions[i];
 
+        const char *err_msg = NULL;
         /* Fill in the results for 'txn'. */
-        if (nl_msg_nlmsgerr(buf_txn->reply, &txn->error)) {
+        if (nl_msg_nlmsgerr(buf_txn->reply, &txn->error, &err_msg)) {
+            if (txn->error) {
+                VLOG_DBG_RL(&rl, "received NAK error=%d - %s",
+                            txn->error,
+                            err_msg ? err_msg : ovs_strerror(txn->error));
+            }
             if (txn->reply) {
                 ofpbuf_clear(txn->reply);
             }
-            if (txn->error) {
-                VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
-                            txn->error, ovs_strerror(txn->error));
-            }
         } else {
             txn->error = 0;
             if (txn->reply && txn != buf_txn) {
@@ -999,7 +1007,7 @@  nl_sock_transact_multiple__(struct nl_sock *sock,
             /* Handle errors embedded within the netlink message. */
             ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
             tmp_reply.size = sizeof reply_buf;
-            if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
+            if (nl_msg_nlmsgerr(&tmp_reply, &txn->error, NULL)) {
                 if (txn->reply) {
                     ofpbuf_clear(txn->reply);
                 }
@@ -1191,7 +1199,7 @@  nl_dump_refill(struct nl_dump *dump, struct ofpbuf *buffer)
         }
     }
 
-    if (nl_msg_nlmsgerr(buffer, &error) && error) {
+    if (nl_msg_nlmsgerr(buffer, &error, NULL) && error) {
         VLOG_INFO_RL(&rl, "netlink dump request error (%s)",
                      ovs_strerror(error));
         ofpbuf_clear(buffer);
diff --git a/lib/netlink.c b/lib/netlink.c
index 26ab20bb4..4c779c852 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -55,15 +55,37 @@  nl_msg_genlmsghdr(const struct ofpbuf *msg)
     return ofpbuf_at(msg, NLMSG_HDRLEN, GENL_HDRLEN);
 }
 
+/* Parses the ext ack netlink attributes and, if successful, a pointer
+ * to the error message, if included, is stored in '*errmsg'. */
+static void
+nl_parse_ext_ack(const struct ofpbuf *msg, size_t offset, const char **errmsg)
+{
+    static const struct nl_policy policy[] = {
+        [NLMSGERR_ATTR_MSG]  = { .type = NL_A_STRING, .optional = true },
+    };
+    struct nlattr *attrs[ARRAY_SIZE(policy)];
+
+    if (!nl_policy_parse(msg, offset, policy, attrs, ARRAY_SIZE(policy))) {
+        VLOG_ERR_RL(&rl, "Failed to parse extended ack data");
+        return;
+    }
+
+    if (attrs[NLMSGERR_ATTR_MSG]) {
+        *errmsg = nl_attr_get_string(attrs[NLMSGERR_ATTR_MSG]);
+    }
+}
+
 /* If 'buffer' is a NLMSG_ERROR message, stores 0 in '*errorp' if it is an ACK
  * message, otherwise a positive errno value, and returns true.  If 'buffer' is
  * not an NLMSG_ERROR message, returns false.
  *
  * 'msg' must be at least as large as a nlmsghdr. */
 bool
-nl_msg_nlmsgerr(const struct ofpbuf *msg, int *errorp)
+nl_msg_nlmsgerr(const struct ofpbuf *msg, int *errorp, const char **attr_msg)
 {
-    if (nl_msg_nlmsghdr(msg)->nlmsg_type == NLMSG_ERROR) {
+    struct nlmsghdr *nlh = nl_msg_nlmsghdr(msg);
+
+    if (nlh->nlmsg_type == NLMSG_ERROR) {
         struct nlmsgerr *err = ofpbuf_at(msg, NLMSG_HDRLEN, sizeof *err);
         int code = EPROTO;
         if (!err) {
@@ -71,6 +93,15 @@  nl_msg_nlmsgerr(const struct ofpbuf *msg, int *errorp)
                         msg->size, NLMSG_HDRLEN + sizeof *err);
         } else if (err->error <= 0 && err->error > INT_MIN) {
             code = -err->error;
+            if (attr_msg && err->error != 0 &&
+                (nlh->nlmsg_flags & NLM_F_ACK_TLVS)) {
+                size_t offt =  NLMSG_HDRLEN + sizeof *err;
+
+                if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) {
+                    offt += (err->msg.nlmsg_len - NLMSG_HDRLEN);
+                }
+                nl_parse_ext_ack(msg, offt, attr_msg);
+            }
         }
         if (errorp) {
             *errorp = code;
diff --git a/lib/netlink.h b/lib/netlink.h
index 44b8e4d1a..628d0d531 100644
--- a/lib/netlink.h
+++ b/lib/netlink.h
@@ -45,7 +45,7 @@  struct nlattr;
 /* Accessing headers and data. */
 struct nlmsghdr *nl_msg_nlmsghdr(const struct ofpbuf *);
 struct genlmsghdr *nl_msg_genlmsghdr(const struct ofpbuf *);
-bool nl_msg_nlmsgerr(const struct ofpbuf *, int *error);
+bool nl_msg_nlmsgerr(const struct ofpbuf *, int *error, const char **attr_msg);
 void nl_msg_reserve(struct ofpbuf *, size_t);
 
 /* Appending and prepending headers and raw data. */