diff mbox series

[v2,33/53] migration/rdma: Fix error handling around rdma_getaddrinfo()

Message ID 20230928132019.2544702-34-armbru@redhat.com
State New
Headers show
Series migration/rdma: Error handling fixes | expand

Commit Message

Markus Armbruster Sept. 28, 2023, 1:19 p.m. UTC
qemu_rdma_resolve_host() and qemu_rdma_dest_init() iterate over
addresses to find one that works, holding onto the first Error from
qemu_rdma_broken_ipv6_kernel() for use when no address works.  Issues:

1. If @errp was &error_abort or &error_fatal, we'd terminate instead
   of trying the next address.  Can't actually happen, since no caller
   passes these arguments.

2. When @errp is a pointer to a variable containing NULL, and
   qemu_rdma_broken_ipv6_kernel() fails, the variable no longer
   contains NULL.  Subsequent iterations pass it again, violating
   Error usage rules.  Dangerous, as setting an error would then trip
   error_setv()'s assertion.  Works only because
   qemu_rdma_broken_ipv6_kernel() and the code following the loops
   carefully avoids setting a second error.

3. If qemu_rdma_broken_ipv6_kernel() fails, and then a later iteration
   finds a working address, @errp still holds the first error from
   qemu_rdma_broken_ipv6_kernel().  If we then run into another error,
   we report the qemu_rdma_broken_ipv6_kernel() failure instead.

4. If we don't run into another error, we leak the Error object.

Use a local error variable, and propagate to @errp.  This fixes 3. and
also cleans up 1 and partly 2.

Free this error when we have a working address.  This fixes 4.

Pass the local error variable to qemu_rdma_broken_ipv6_kernel() only
until it fails.  Pass null on any later iterations.  This cleans up
the remainder of 2.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
---
 migration/rdma.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

Comments

Juan Quintela Oct. 4, 2023, 4:51 p.m. UTC | #1
Markus Armbruster <armbru@redhat.com> wrote:
> qemu_rdma_resolve_host() and qemu_rdma_dest_init() iterate over
> addresses to find one that works, holding onto the first Error from
> qemu_rdma_broken_ipv6_kernel() for use when no address works.  Issues:
>
> 1. If @errp was &error_abort or &error_fatal, we'd terminate instead
>    of trying the next address.  Can't actually happen, since no caller
>    passes these arguments.
>
> 2. When @errp is a pointer to a variable containing NULL, and
>    qemu_rdma_broken_ipv6_kernel() fails, the variable no longer
>    contains NULL.  Subsequent iterations pass it again, violating
>    Error usage rules.  Dangerous, as setting an error would then trip
>    error_setv()'s assertion.  Works only because
>    qemu_rdma_broken_ipv6_kernel() and the code following the loops
>    carefully avoids setting a second error.
>
> 3. If qemu_rdma_broken_ipv6_kernel() fails, and then a later iteration
>    finds a working address, @errp still holds the first error from
>    qemu_rdma_broken_ipv6_kernel().  If we then run into another error,
>    we report the qemu_rdma_broken_ipv6_kernel() failure instead.
>
> 4. If we don't run into another error, we leak the Error object.
>
> Use a local error variable, and propagate to @errp.  This fixes 3. and
> also cleans up 1 and partly 2.
>
> Free this error when we have a working address.  This fixes 4.
>
> Pass the local error variable to qemu_rdma_broken_ipv6_kernel() only
> until it fails.  Pass null on any later iterations.  This cleans up
> the remainder of 2.
>
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>

Reviewed-by: Juan Quintela <quintela@redhat.com>
diff mbox series

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index cbb6822dda..4fec6dbf86 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -932,6 +932,7 @@  static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
  */
 static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
 {
+    Error *err = NULL;
     int ret;
     struct rdma_addrinfo *res;
     char port_str[16];
@@ -976,7 +977,10 @@  static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
         goto err_resolve_get_addr;
     }
 
+    /* Try all addresses, saving the first error in @err */
     for (e = res; e != NULL; e = e->ai_next) {
+        Error **local_errp = err ? NULL : &err;
+
         inet_ntop(e->ai_family,
             &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
         trace_qemu_rdma_resolve_host_trying(rdma->host, ip);
@@ -985,17 +989,21 @@  static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
                 RDMA_RESOLVE_TIMEOUT_MS);
         if (ret >= 0) {
             if (e->ai_family == AF_INET6) {
-                ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs, errp);
+                ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs,
+                                                   local_errp);
                 if (ret < 0) {
                     continue;
                 }
             }
+            error_free(err);
             goto route;
         }
     }
 
     rdma_freeaddrinfo(res);
-    if (errp && !*errp) {
+    if (err) {
+        error_propagate(errp, err);
+    } else {
         error_setg(errp, "RDMA ERROR: could not resolve address %s",
                    rdma->host);
     }
@@ -2687,6 +2695,7 @@  err_rdma_source_connect:
 
 static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
 {
+    Error *err = NULL;
     int ret, idx;
     struct rdma_cm_id *listen_id;
     char ip[40] = "unknown";
@@ -2745,7 +2754,11 @@  static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
         }
         goto err_dest_init_bind_addr;
     }
+
+    /* Try all addresses, saving the first error in @err */
     for (e = res; e != NULL; e = e->ai_next) {
+        Error **local_errp = err ? NULL : &err;
+
         inet_ntop(e->ai_family,
             &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
         trace_qemu_rdma_dest_init_trying(rdma->host, ip);
@@ -2754,17 +2767,21 @@  static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
             continue;
         }
         if (e->ai_family == AF_INET6) {
-            ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs, errp);
+            ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs,
+                                               local_errp);
             if (ret < 0) {
                 continue;
             }
         }
+        error_free(err);
         break;
     }
 
     rdma_freeaddrinfo(res);
     if (!e) {
-        if (errp && !*errp) {
+        if (err) {
+            error_propagate(errp, err);
+        } else {
             error_setg(errp, "RDMA ERROR: Error: could not rdma_bind_addr!");
         }
         goto err_dest_init_bind_addr;