diff mbox series

[v2,52/53] migration/rdma: Use error_report() & friends instead of stderr

Message ID 20230928132019.2544702-53-armbru@redhat.com
State New
Headers show
Series migration/rdma: Error handling fixes | expand

Commit Message

Markus Armbruster Sept. 28, 2023, 1:20 p.m. UTC
error_report() obeys -msg, reports the current error location if any,
and reports to the current monitor if any.  Reporting to stderr
directly with fprintf() or perror() is wrong, because it loses all
this.

Fix the offenders.  Bonus: resolves a FIXME about problematic use of
errno.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 migration/rdma.c | 44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

Comments

Fabiano Rosas Sept. 29, 2023, 3:36 p.m. UTC | #1
Markus Armbruster <armbru@redhat.com> writes:

> error_report() obeys -msg, reports the current error location if any,
> and reports to the current monitor if any.  Reporting to stderr
> directly with fprintf() or perror() is wrong, because it loses all
> this.
>
> Fix the offenders.  Bonus: resolves a FIXME about problematic use of
> errno.
>
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
>  migration/rdma.c | 44 +++++++++++++++++++++-----------------------
>  1 file changed, 21 insertions(+), 23 deletions(-)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 54b59d12b1..dba0802fca 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -877,12 +877,12 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
>  
>          if (roce_found) {
>              if (ib_found) {
> -                fprintf(stderr, "WARN: migrations may fail:"
> -                                " IPv6 over RoCE / iWARP in linux"
> -                                " is broken. But since you appear to have a"
> -                                " mixed RoCE / IB environment, be sure to only"
> -                                " migrate over the IB fabric until the kernel "
> -                                " fixes the bug.\n");
> +                warn_report("WARN: migrations may fail:"
> +                            " IPv6 over RoCE / iWARP in linux"
> +                            " is broken. But since you appear to have a"
> +                            " mixed RoCE / IB environment, be sure to only"
> +                            " migrate over the IB fabric until the kernel "
> +                            " fixes the bug.");

Won't this become "warning: WARN:"?

>              } else {
>                  error_setg(errp, "RDMA ERROR: "
>                             "You only have RoCE / iWARP devices in your systems"
> @@ -1418,12 +1418,8 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma)
>          block->remote_keys[chunk] = 0;
>  
>          if (ret != 0) {
> -            /*
> -             * FIXME perror() is problematic, bcause ibv_dereg_mr() is
> -             * not documented to set errno.  Will go away later in
> -             * this series.
> -             */
> -            perror("unregistration chunk failed");
> +            error_report("unregistration chunk failed: %s",
> +                         strerror(ret));

Doesn't seem to fix the issue, ret might still not be an errno. Am I
missing something?

>              return -1;
>          }
Markus Armbruster Oct. 4, 2023, 11:15 a.m. UTC | #2
Fabiano Rosas <farosas@suse.de> writes:

> Markus Armbruster <armbru@redhat.com> writes:
>
>> error_report() obeys -msg, reports the current error location if any,
>> and reports to the current monitor if any.  Reporting to stderr
>> directly with fprintf() or perror() is wrong, because it loses all
>> this.
>>
>> Fix the offenders.  Bonus: resolves a FIXME about problematic use of
>> errno.
>>
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>> ---
>>  migration/rdma.c | 44 +++++++++++++++++++++-----------------------
>>  1 file changed, 21 insertions(+), 23 deletions(-)
>>
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index 54b59d12b1..dba0802fca 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -877,12 +877,12 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
>>  
>>          if (roce_found) {
>>              if (ib_found) {
>> -                fprintf(stderr, "WARN: migrations may fail:"
>> -                                " IPv6 over RoCE / iWARP in linux"
>> -                                " is broken. But since you appear to have a"
>> -                                " mixed RoCE / IB environment, be sure to only"
>> -                                " migrate over the IB fabric until the kernel "
>> -                                " fixes the bug.\n");
>> +                warn_report("WARN: migrations may fail:"
>> +                            " IPv6 over RoCE / iWARP in linux"
>> +                            " is broken. But since you appear to have a"
>> +                            " mixed RoCE / IB environment, be sure to only"
>> +                            " migrate over the IB fabric until the kernel "
>> +                            " fixes the bug.");
>
> Won't this become "warning: WARN:"?

It will.  I'll drop the "WARN: " prefix.

>>              } else {
>>                  error_setg(errp, "RDMA ERROR: "
>>                             "You only have RoCE / iWARP devices in your systems"
>> @@ -1418,12 +1418,8 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma)
>>          block->remote_keys[chunk] = 0;
>>  
>>          if (ret != 0) {
>> -            /*
>> -             * FIXME perror() is problematic, bcause ibv_dereg_mr() is
>> -             * not documented to set errno.  Will go away later in
>> -             * this series.
>> -             */
>> -            perror("unregistration chunk failed");
>> +            error_report("unregistration chunk failed: %s",
>> +                         strerror(ret));
>
> Doesn't seem to fix the issue, ret might still not be an errno. Am I
> missing something?

Yes :)

ibv_dereg_mr(3) section RETURN VALUE has:

       ibv_dereg_mr()  returns  0 on success, or the value of errno on failure
       (which indicates the failure reason).

Clearer now?

>>              return -1;
>>          }
Fabiano Rosas Oct. 4, 2023, 1:52 p.m. UTC | #3
Markus Armbruster <armbru@redhat.com> writes:

> Fabiano Rosas <farosas@suse.de> writes:
>
>> Markus Armbruster <armbru@redhat.com> writes:
>>
>>> error_report() obeys -msg, reports the current error location if any,
>>> and reports to the current monitor if any.  Reporting to stderr
>>> directly with fprintf() or perror() is wrong, because it loses all
>>> this.
>>>
>>> Fix the offenders.  Bonus: resolves a FIXME about problematic use of
>>> errno.
>>>
>>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>>> ---
>>>  migration/rdma.c | 44 +++++++++++++++++++++-----------------------
>>>  1 file changed, 21 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/migration/rdma.c b/migration/rdma.c
>>> index 54b59d12b1..dba0802fca 100644
>>> --- a/migration/rdma.c
>>> +++ b/migration/rdma.c
>>> @@ -877,12 +877,12 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
>>>  
>>>          if (roce_found) {
>>>              if (ib_found) {
>>> -                fprintf(stderr, "WARN: migrations may fail:"
>>> -                                " IPv6 over RoCE / iWARP in linux"
>>> -                                " is broken. But since you appear to have a"
>>> -                                " mixed RoCE / IB environment, be sure to only"
>>> -                                " migrate over the IB fabric until the kernel "
>>> -                                " fixes the bug.\n");
>>> +                warn_report("WARN: migrations may fail:"
>>> +                            " IPv6 over RoCE / iWARP in linux"
>>> +                            " is broken. But since you appear to have a"
>>> +                            " mixed RoCE / IB environment, be sure to only"
>>> +                            " migrate over the IB fabric until the kernel "
>>> +                            " fixes the bug.");
>>
>> Won't this become "warning: WARN:"?
>
> It will.  I'll drop the "WARN: " prefix.
>
>>>              } else {
>>>                  error_setg(errp, "RDMA ERROR: "
>>>                             "You only have RoCE / iWARP devices in your systems"
>>> @@ -1418,12 +1418,8 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma)
>>>          block->remote_keys[chunk] = 0;
>>>  
>>>          if (ret != 0) {
>>> -            /*
>>> -             * FIXME perror() is problematic, bcause ibv_dereg_mr() is
>>> -             * not documented to set errno.  Will go away later in
>>> -             * this series.
>>> -             */
>>> -            perror("unregistration chunk failed");
>>> +            error_report("unregistration chunk failed: %s",
>>> +                         strerror(ret));
>>
>> Doesn't seem to fix the issue, ret might still not be an errno. Am I
>> missing something?
>
> Yes :)
>
> ibv_dereg_mr(3) section RETURN VALUE has:
>
>        ibv_dereg_mr()  returns  0 on success, or the value of errno on failure
>        (which indicates the failure reason).
>
> Clearer now?

Yep, thank you.
Juan Quintela Oct. 5, 2023, 7:24 a.m. UTC | #4
Markus Armbruster <armbru@redhat.com> wrote:
> error_report() obeys -msg, reports the current error location if any,
> and reports to the current monitor if any.  Reporting to stderr
> directly with fprintf() or perror() is wrong, because it loses all
> this.
>
> Fix the offenders.  Bonus: resolves a FIXME about problematic use of
> errno.
>
> Signed-off-by: Markus Armbruster <armbru@redhat.com>

I fixed the WARN issue by hand.

Reviewed-by: Juan Quintela <quintela@redhat.com>
Li Zhijian Oct. 7, 2023, 3:56 a.m. UTC | #5
On 28/09/2023 21:20, Markus Armbruster wrote:
> error_report() obeys -msg, reports the current error location if any,
> and reports to the current monitor if any.  Reporting to stderr
> directly with fprintf() or perror() is wrong, because it loses all
> this.
> 
> Fix the offenders.  Bonus: resolves a FIXME about problematic use of
> errno.
> 
> Signed-off-by: Markus Armbruster <armbru@redhat.com>

Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>


> ---
>   migration/rdma.c | 44 +++++++++++++++++++++-----------------------
>   1 file changed, 21 insertions(+), 23 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 54b59d12b1..dba0802fca 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -877,12 +877,12 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
>   
>           if (roce_found) {
>               if (ib_found) {
> -                fprintf(stderr, "WARN: migrations may fail:"
> -                                " IPv6 over RoCE / iWARP in linux"
> -                                " is broken. But since you appear to have a"
> -                                " mixed RoCE / IB environment, be sure to only"
> -                                " migrate over the IB fabric until the kernel "
> -                                " fixes the bug.\n");
> +                warn_report("WARN: migrations may fail:"
> +                            " IPv6 over RoCE / iWARP in linux"
> +                            " is broken. But since you appear to have a"
> +                            " mixed RoCE / IB environment, be sure to only"
> +                            " migrate over the IB fabric until the kernel "
> +                            " fixes the bug.");
>               } else {
>                   error_setg(errp, "RDMA ERROR: "
>                              "You only have RoCE / iWARP devices in your systems"
> @@ -1418,12 +1418,8 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma)
>           block->remote_keys[chunk] = 0;
>   
>           if (ret != 0) {
> -            /*
> -             * FIXME perror() is problematic, bcause ibv_dereg_mr() is
> -             * not documented to set errno.  Will go away later in
> -             * this series.
> -             */
> -            perror("unregistration chunk failed");
> +            error_report("unregistration chunk failed: %s",
> +                         strerror(ret));
>               return -1;
>           }
>           rdma->total_registrations--;
> @@ -3767,7 +3763,8 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>                   block->pmr[reg->key.chunk] = NULL;
>   
>                   if (ret != 0) {
> -                    perror("rdma unregistration chunk failed");
> +                    error_report("rdma unregistration chunk failed: %s",
> +                                 strerror(errno));
>                       goto err;
>                   }
>   
> @@ -3956,10 +3953,10 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
>            */
>   
>           if (local->nb_blocks != nb_dest_blocks) {
> -            fprintf(stderr, "ram blocks mismatch (Number of blocks %d vs %d) "
> -                    "Your QEMU command line parameters are probably "
> -                    "not identical on both the source and destination.",
> -                    local->nb_blocks, nb_dest_blocks);
> +            error_report("ram blocks mismatch (Number of blocks %d vs %d)",
> +                         local->nb_blocks, nb_dest_blocks);
> +            error_printf("Your QEMU command line parameters are probably "
> +                         "not identical on both the source and destination.");
>               rdma->errored = true;
>               return -1;
>           }
> @@ -3972,10 +3969,11 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
>   
>               /* We require that the blocks are in the same order */
>               if (rdma->dest_blocks[i].length != local->block[i].length) {
> -                fprintf(stderr, "Block %s/%d has a different length %" PRIu64
> -                        "vs %" PRIu64, local->block[i].block_name, i,
> -                        local->block[i].length,
> -                        rdma->dest_blocks[i].length);
> +                error_report("Block %s/%d has a different length %" PRIu64
> +                             "vs %" PRIu64,
> +                             local->block[i].block_name, i,
> +                             local->block[i].length,
> +                             rdma->dest_blocks[i].length);
>                   rdma->errored = true;
>                   return -1;
>               }
> @@ -4091,7 +4089,7 @@ static void rdma_accept_incoming_migration(void *opaque)
>       ret = qemu_rdma_accept(rdma);
>   
>       if (ret < 0) {
> -        fprintf(stderr, "RDMA ERROR: Migration initialization failed\n");
> +        error_report("RDMA ERROR: Migration initialization failed");
>           return;
>       }
>   
> @@ -4103,7 +4101,7 @@ static void rdma_accept_incoming_migration(void *opaque)
>   
>       f = rdma_new_input(rdma);
>       if (f == NULL) {
> -        fprintf(stderr, "RDMA ERROR: could not open RDMA for input\n");
> +        error_report("RDMA ERROR: could not open RDMA for input");
>           qemu_rdma_cleanup(rdma);
>           return;
>       }
diff mbox series

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index 54b59d12b1..dba0802fca 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -877,12 +877,12 @@  static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
 
         if (roce_found) {
             if (ib_found) {
-                fprintf(stderr, "WARN: migrations may fail:"
-                                " IPv6 over RoCE / iWARP in linux"
-                                " is broken. But since you appear to have a"
-                                " mixed RoCE / IB environment, be sure to only"
-                                " migrate over the IB fabric until the kernel "
-                                " fixes the bug.\n");
+                warn_report("WARN: migrations may fail:"
+                            " IPv6 over RoCE / iWARP in linux"
+                            " is broken. But since you appear to have a"
+                            " mixed RoCE / IB environment, be sure to only"
+                            " migrate over the IB fabric until the kernel "
+                            " fixes the bug.");
             } else {
                 error_setg(errp, "RDMA ERROR: "
                            "You only have RoCE / iWARP devices in your systems"
@@ -1418,12 +1418,8 @@  static int qemu_rdma_unregister_waiting(RDMAContext *rdma)
         block->remote_keys[chunk] = 0;
 
         if (ret != 0) {
-            /*
-             * FIXME perror() is problematic, bcause ibv_dereg_mr() is
-             * not documented to set errno.  Will go away later in
-             * this series.
-             */
-            perror("unregistration chunk failed");
+            error_report("unregistration chunk failed: %s",
+                         strerror(ret));
             return -1;
         }
         rdma->total_registrations--;
@@ -3767,7 +3763,8 @@  static int qemu_rdma_registration_handle(QEMUFile *f)
                 block->pmr[reg->key.chunk] = NULL;
 
                 if (ret != 0) {
-                    perror("rdma unregistration chunk failed");
+                    error_report("rdma unregistration chunk failed: %s",
+                                 strerror(errno));
                     goto err;
                 }
 
@@ -3956,10 +3953,10 @@  static int qemu_rdma_registration_stop(QEMUFile *f,
          */
 
         if (local->nb_blocks != nb_dest_blocks) {
-            fprintf(stderr, "ram blocks mismatch (Number of blocks %d vs %d) "
-                    "Your QEMU command line parameters are probably "
-                    "not identical on both the source and destination.",
-                    local->nb_blocks, nb_dest_blocks);
+            error_report("ram blocks mismatch (Number of blocks %d vs %d)",
+                         local->nb_blocks, nb_dest_blocks);
+            error_printf("Your QEMU command line parameters are probably "
+                         "not identical on both the source and destination.");
             rdma->errored = true;
             return -1;
         }
@@ -3972,10 +3969,11 @@  static int qemu_rdma_registration_stop(QEMUFile *f,
 
             /* We require that the blocks are in the same order */
             if (rdma->dest_blocks[i].length != local->block[i].length) {
-                fprintf(stderr, "Block %s/%d has a different length %" PRIu64
-                        "vs %" PRIu64, local->block[i].block_name, i,
-                        local->block[i].length,
-                        rdma->dest_blocks[i].length);
+                error_report("Block %s/%d has a different length %" PRIu64
+                             "vs %" PRIu64,
+                             local->block[i].block_name, i,
+                             local->block[i].length,
+                             rdma->dest_blocks[i].length);
                 rdma->errored = true;
                 return -1;
             }
@@ -4091,7 +4089,7 @@  static void rdma_accept_incoming_migration(void *opaque)
     ret = qemu_rdma_accept(rdma);
 
     if (ret < 0) {
-        fprintf(stderr, "RDMA ERROR: Migration initialization failed\n");
+        error_report("RDMA ERROR: Migration initialization failed");
         return;
     }
 
@@ -4103,7 +4101,7 @@  static void rdma_accept_incoming_migration(void *opaque)
 
     f = rdma_new_input(rdma);
     if (f == NULL) {
-        fprintf(stderr, "RDMA ERROR: could not open RDMA for input\n");
+        error_report("RDMA ERROR: could not open RDMA for input");
         qemu_rdma_cleanup(rdma);
         return;
     }