Message ID | 20230918144206.560120-22-armbru@redhat.com |
---|---|
State | New |
Headers | show |
Series | migration/rdma: Error handling fixes | expand |
On 18/09/2023 22:41, Markus Armbruster wrote: > The QEMUFileHooks methods don't come with a written contract. Digging > through the code calling them, we find: > > * save_page(): I'm fine with this > > Negative values RAM_SAVE_CONTROL_DELAYED and > RAM_SAVE_CONTROL_NOT_SUPP are special. Any other negative value is > an unspecified error. > > qemu_rdma_save_page() returns -EIO or rdma->error_state on error. I > believe the latter is always negative. Nothing stops either of them > to clash with the special values, though. Feels unlikely, but fix > it anyway to return only the special values and -1. > > * before_ram_iterate(), before_ram_iterate(): error code returned by before_ram_iterate() and before_ram_iterate() will be assigned to QEMUFile for upper layer. But it seems that no callers take care about the error ? Shouldn't let the callers to check the error instead ? > > Negative value means error. qemu_rdma_registration_start() and > qemu_rdma_registration_stop() comply as far as I can tell. Make > them comply *obviously*, by returning -1 on error. > > * hook_ram_load: > > Negative value means error. rdma_load_hook() already returns -1 on > error. Leave it alone. see inline reply below, > > Signed-off-by: Markus Armbruster <armbru@redhat.com> > --- > migration/rdma.c | 79 +++++++++++++++++++++++------------------------- > 1 file changed, 37 insertions(+), 42 deletions(-) > > diff --git a/migration/rdma.c b/migration/rdma.c > index cc59155a50..46b5859268 100644 > --- a/migration/rdma.c > +++ b/migration/rdma.c > @@ -3219,12 +3219,11 @@ static size_t qemu_rdma_save_page(QEMUFile *f, > rdma = qatomic_rcu_read(&rioc->rdmaout); > > if (!rdma) { > - return -EIO; > + return -1; > } > > - ret = check_error_state(rdma); > - if (ret) { > - return ret; > + if (check_error_state(rdma)) { > + return -1; > } > > qemu_fflush(f); > @@ -3290,9 +3289,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f, > } > > return RAM_SAVE_CONTROL_DELAYED; > + > err: > rdma->error_state = ret; > - return ret; > + return -1; > } > > static void rdma_accept_incoming_migration(void *opaque); > @@ -3538,12 +3538,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > rdma = qatomic_rcu_read(&rioc->rdmain); > > if (!rdma) { > - return -EIO; > + return -1; that's because EIO is not accurate here ? > } > > - ret = check_error_state(rdma); > - if (ret) { > - return ret; Ditto Thanks Zhijian > + if (check_error_state(rdma)) { > + return -1; > } > > local = &rdma->local_ram_blocks; > @@ -3576,7 +3575,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > (unsigned int)comp->block_idx, > rdma->local_ram_blocks.nb_blocks); > ret = -EIO; > - goto out; > + goto err; > } > block = &(rdma->local_ram_blocks.block[comp->block_idx]); > > @@ -3588,7 +3587,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > > case RDMA_CONTROL_REGISTER_FINISHED: > trace_qemu_rdma_registration_handle_finished(); > - goto out; > + return 0; > > case RDMA_CONTROL_RAM_BLOCKS_REQUEST: > trace_qemu_rdma_registration_handle_ram_blocks(); > @@ -3609,7 +3608,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > if (ret) { > error_report("rdma migration: error dest " > "registering ram blocks"); > - goto out; > + goto err; > } > } > > @@ -3648,7 +3647,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > > if (ret < 0) { > error_report("rdma migration: error sending remote info"); > - goto out; > + goto err; > } > > break; > @@ -3675,7 +3674,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > (unsigned int)reg->current_index, > rdma->local_ram_blocks.nb_blocks); > ret = -ENOENT; > - goto out; > + goto err; > } > block = &(rdma->local_ram_blocks.block[reg->current_index]); > if (block->is_ram_block) { > @@ -3685,7 +3684,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > block->block_name, block->offset, > reg->key.current_addr); > ret = -ERANGE; > - goto out; > + goto err; > } > host_addr = (block->local_host_addr + > (reg->key.current_addr - block->offset)); > @@ -3701,7 +3700,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > " chunk: %" PRIx64, > block->block_name, reg->key.chunk); > ret = -ERANGE; > - goto out; > + goto err; > } > } > chunk_start = ram_chunk_start(block, chunk); > @@ -3713,7 +3712,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > chunk, chunk_start, chunk_end)) { > error_report("cannot get rkey"); > ret = -EINVAL; > - goto out; > + goto err; > } > reg_result->rkey = tmp_rkey; > > @@ -3730,7 +3729,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > > if (ret < 0) { > error_report("Failed to send control buffer"); > - goto out; > + goto err; > } > break; > case RDMA_CONTROL_UNREGISTER_REQUEST: > @@ -3753,7 +3752,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > if (ret != 0) { > perror("rdma unregistration chunk failed"); > ret = -ret; > - goto out; > + goto err; > } > > rdma->total_registrations--; > @@ -3766,24 +3765,23 @@ static int qemu_rdma_registration_handle(QEMUFile *f) > > if (ret < 0) { > error_report("Failed to send control buffer"); > - goto out; > + goto err; > } > break; > case RDMA_CONTROL_REGISTER_RESULT: > error_report("Invalid RESULT message at dest."); > ret = -EIO; > - goto out; > + goto err; > default: > error_report("Unknown control message %s", control_desc(head.type)); > ret = -EIO; > - goto out; > + goto err; > } > } while (1); > -out: > - if (ret < 0) { > - rdma->error_state = ret; > - } > - return ret; > + > +err: > + rdma->error_state = ret; > + return -1; > } > > /* Destination: > @@ -3805,7 +3803,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name) > rdma = qatomic_rcu_read(&rioc->rdmain); > > if (!rdma) { > - return -EIO; > + return -1; > } > > /* Find the matching RAMBlock in our local list */ > @@ -3818,7 +3816,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name) > > if (found == -1) { > error_report("RAMBlock '%s' not found on destination", name); > - return -ENOENT; > + return -1; > } > > rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index; > @@ -3848,7 +3846,6 @@ static int qemu_rdma_registration_start(QEMUFile *f, > { > QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); > RDMAContext *rdma; > - int ret; > > if (migration_in_postcopy()) { > return 0; > @@ -3857,12 +3854,11 @@ static int qemu_rdma_registration_start(QEMUFile *f, > RCU_READ_LOCK_GUARD(); > rdma = qatomic_rcu_read(&rioc->rdmaout); > if (!rdma) { > - return -EIO; > + return -1; > } > > - ret = check_error_state(rdma); > - if (ret) { > - return ret; > + if (check_error_state(rdma)) { > + return -1; > } > > trace_qemu_rdma_registration_start(flags); > @@ -3891,12 +3887,11 @@ static int qemu_rdma_registration_stop(QEMUFile *f, > RCU_READ_LOCK_GUARD(); > rdma = qatomic_rcu_read(&rioc->rdmaout); > if (!rdma) { > - return -EIO; > + return -1; > } > > - ret = check_error_state(rdma); > - if (ret) { > - return ret; > + if (check_error_state(rdma)) { > + return -1; > } > > qemu_fflush(f); > @@ -3927,7 +3922,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, > qemu_rdma_reg_whole_ram_blocks : NULL); > if (ret < 0) { > fprintf(stderr, "receiving remote info!"); > - return ret; > + return -1; > } > > nb_dest_blocks = resp.len / sizeof(RDMADestBlock); > @@ -3950,7 +3945,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, > "not identical on both the source and destination.", > local->nb_blocks, nb_dest_blocks); > rdma->error_state = -EINVAL; > - return -EINVAL; > + return -1; > } > > qemu_rdma_move_header(rdma, reg_result_idx, &resp); > @@ -3966,7 +3961,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, > local->block[i].length, > rdma->dest_blocks[i].length); > rdma->error_state = -EINVAL; > - return -EINVAL; > + return -1; > } > local->block[i].remote_host_addr = > rdma->dest_blocks[i].remote_host_addr; > @@ -3986,7 +3981,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, > return 0; > err: > rdma->error_state = ret; > - return ret; > + return -1; > } > > static const QEMUFileHooks rdma_read_hooks = {
"Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> writes: > On 18/09/2023 22:41, Markus Armbruster wrote: >> The QEMUFileHooks methods don't come with a written contract. Digging >> through the code calling them, we find: >> >> * save_page(): > > I'm fine with this > >> >> Negative values RAM_SAVE_CONTROL_DELAYED and >> RAM_SAVE_CONTROL_NOT_SUPP are special. Any other negative value is >> an unspecified error. >> >> qemu_rdma_save_page() returns -EIO or rdma->error_state on error. I >> believe the latter is always negative. Nothing stops either of them >> to clash with the special values, though. Feels unlikely, but fix >> it anyway to return only the special values and -1. >> >> * before_ram_iterate(), before_ram_iterate(): > > error code returned by before_ram_iterate() and before_ram_iterate() will be > assigned to QEMUFile for upper layer. > But it seems that no callers take care about the error ? Shouldn't let the callers > to check the error instead ? The error values returned by qemu_rdma_registration_start() and qemu_rdma_registration_stop() carry no additional information a caller could check. Both return either -EIO or rdma->error_state on error. The latter is *not* a negative errno code. Evidence: qio_channel_rdma_shutdown() assigns -1, qemu_rdma_block_for_wrid() assigns the error value of qemu_rdma_poll(), which can be the error value of ibv_poll_cq(), which is an unspecified negative value, ... I decided not to investigate what qemu-file.c does with the error values after one quick glance at the code. It's confusing, and quite possibly confused. But I'm already at 50+ patches, and am neither inclined nor able to take on more migration cleanup at this time. >> Negative value means error. qemu_rdma_registration_start() and >> qemu_rdma_registration_stop() comply as far as I can tell. Make >> them comply *obviously*, by returning -1 on error. >> >> * hook_ram_load: >> >> Negative value means error. rdma_load_hook() already returns -1 on >> error. Leave it alone. > > see inline reply below, > >> >> Signed-off-by: Markus Armbruster <armbru@redhat.com> >> --- >> migration/rdma.c | 79 +++++++++++++++++++++++------------------------- >> 1 file changed, 37 insertions(+), 42 deletions(-) >> >> diff --git a/migration/rdma.c b/migration/rdma.c >> index cc59155a50..46b5859268 100644 >> --- a/migration/rdma.c >> +++ b/migration/rdma.c >> @@ -3219,12 +3219,11 @@ static size_t qemu_rdma_save_page(QEMUFile *f, >> rdma = qatomic_rcu_read(&rioc->rdmaout); >> >> if (!rdma) { >> - return -EIO; >> + return -1; >> } >> >> - ret = check_error_state(rdma); >> - if (ret) { >> - return ret; >> + if (check_error_state(rdma)) { >> + return -1; >> } >> >> qemu_fflush(f); >> @@ -3290,9 +3289,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f, >> } >> >> return RAM_SAVE_CONTROL_DELAYED; >> + >> err: >> rdma->error_state = ret; >> - return ret; >> + return -1; >> } >> >> static void rdma_accept_incoming_migration(void *opaque); >> @@ -3538,12 +3538,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f) >> rdma = qatomic_rcu_read(&rioc->rdmain); >> >> if (!rdma) { >> - return -EIO; >> + return -1; > > that's because EIO is not accurate here ? It's because the function does not return a negative errno code, and returning -EIO is misleading readers into assuming it does. >> } >> >> - ret = check_error_state(rdma); >> - if (ret) { >> - return ret; > > Ditto Likewise. [...]
On 25/09/2023 14:36, Markus Armbruster wrote: > "Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> writes: > >> On 18/09/2023 22:41, Markus Armbruster wrote: >>> The QEMUFileHooks methods don't come with a written contract. Digging >>> through the code calling them, we find: >>> >>> * save_page(): >> >> I'm fine with this >> >>> >>> Negative values RAM_SAVE_CONTROL_DELAYED and >>> RAM_SAVE_CONTROL_NOT_SUPP are special. Any other negative value is >>> an unspecified error. >>> >>> qemu_rdma_save_page() returns -EIO or rdma->error_state on error. I >>> believe the latter is always negative. Nothing stops either of them >>> to clash with the special values, though. Feels unlikely, but fix >>> it anyway to return only the special values and -1. >>> >>> * before_ram_iterate(), before_ram_iterate(): >> >> error code returned by before_ram_iterate() and before_ram_iterate() will be >> assigned to QEMUFile for upper layer. >> But it seems that no callers take care about the error ? Shouldn't let the callers >> to check the error instead ? > > The error values returned by qemu_rdma_registration_start() and > qemu_rdma_registration_stop() carry no additional information a caller > could check. I think qemu_file_get_error(f) can be used for callers to check the error code. > > Both return either -EIO or rdma->error_state on error. The latter is > *not* a negative errno code. Evidence: qio_channel_rdma_shutdown() > assigns -1, qemu_rdma_block_for_wrid() assigns the error value of > qemu_rdma_poll(), which can be the error value of ibv_poll_cq(), which > is an unspecified negative value, ... > you are right. > I decided not to investigate what qemu-file.c does with the error values > after one quick glance at the code. It's confusing, and quite possibly > confused. But I'm already at 50+ patches, and am neither inclined nor > able to take on more migration cleanup at this time. Yeah, it's already big enough patch set. very thanks Reviewed-by: Li Zhijian <lizhijian@fujitsu.com> > >>> Negative value means error. qemu_rdma_registration_start() and >>> qemu_rdma_registration_stop() comply as far as I can tell. Make >>> them comply *obviously*, by returning -1 on error. >>> >>> * hook_ram_load: >>> >>> Negative value means error. rdma_load_hook() already returns -1 on >>> error. Leave it alone. >> >> see inline reply below, >> >>> >>> Signed-off-by: Markus Armbruster <armbru@redhat.com> >>> --- >>> migration/rdma.c | 79 +++++++++++++++++++++++------------------------- >>> 1 file changed, 37 insertions(+), 42 deletions(-) >>> >>> diff --git a/migration/rdma.c b/migration/rdma.c >>> index cc59155a50..46b5859268 100644 >>> --- a/migration/rdma.c >>> +++ b/migration/rdma.c >>> @@ -3219,12 +3219,11 @@ static size_t qemu_rdma_save_page(QEMUFile *f, >>> rdma = qatomic_rcu_read(&rioc->rdmaout); >>> >>> if (!rdma) { >>> - return -EIO; >>> + return -1; >>> } >>> >>> - ret = check_error_state(rdma); >>> - if (ret) { >>> - return ret; >>> + if (check_error_state(rdma)) { >>> + return -1; >>> } >>> >>> qemu_fflush(f); >>> @@ -3290,9 +3289,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f, >>> } >>> >>> return RAM_SAVE_CONTROL_DELAYED; >>> + >>> err: >>> rdma->error_state = ret; >>> - return ret; >>> + return -1; >>> } >>> >>> static void rdma_accept_incoming_migration(void *opaque); >>> @@ -3538,12 +3538,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f) >>> rdma = qatomic_rcu_read(&rioc->rdmain); >>> >>> if (!rdma) { >>> - return -EIO; >>> + return -1; >> >> that's because EIO is not accurate here ? > > It's because the function does not return a negative errno code, and > returning -EIO is misleading readers into assuming it does > >>> } >>> >>> - ret = check_error_state(rdma); >>> - if (ret) { >>> - return ret; >> >> Ditto > > Likewise. > > [...] >
diff --git a/migration/rdma.c b/migration/rdma.c index cc59155a50..46b5859268 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3219,12 +3219,11 @@ static size_t qemu_rdma_save_page(QEMUFile *f, rdma = qatomic_rcu_read(&rioc->rdmaout); if (!rdma) { - return -EIO; + return -1; } - ret = check_error_state(rdma); - if (ret) { - return ret; + if (check_error_state(rdma)) { + return -1; } qemu_fflush(f); @@ -3290,9 +3289,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f, } return RAM_SAVE_CONTROL_DELAYED; + err: rdma->error_state = ret; - return ret; + return -1; } static void rdma_accept_incoming_migration(void *opaque); @@ -3538,12 +3538,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f) rdma = qatomic_rcu_read(&rioc->rdmain); if (!rdma) { - return -EIO; + return -1; } - ret = check_error_state(rdma); - if (ret) { - return ret; + if (check_error_state(rdma)) { + return -1; } local = &rdma->local_ram_blocks; @@ -3576,7 +3575,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) (unsigned int)comp->block_idx, rdma->local_ram_blocks.nb_blocks); ret = -EIO; - goto out; + goto err; } block = &(rdma->local_ram_blocks.block[comp->block_idx]); @@ -3588,7 +3587,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) case RDMA_CONTROL_REGISTER_FINISHED: trace_qemu_rdma_registration_handle_finished(); - goto out; + return 0; case RDMA_CONTROL_RAM_BLOCKS_REQUEST: trace_qemu_rdma_registration_handle_ram_blocks(); @@ -3609,7 +3608,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) if (ret) { error_report("rdma migration: error dest " "registering ram blocks"); - goto out; + goto err; } } @@ -3648,7 +3647,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) if (ret < 0) { error_report("rdma migration: error sending remote info"); - goto out; + goto err; } break; @@ -3675,7 +3674,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) (unsigned int)reg->current_index, rdma->local_ram_blocks.nb_blocks); ret = -ENOENT; - goto out; + goto err; } block = &(rdma->local_ram_blocks.block[reg->current_index]); if (block->is_ram_block) { @@ -3685,7 +3684,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) block->block_name, block->offset, reg->key.current_addr); ret = -ERANGE; - goto out; + goto err; } host_addr = (block->local_host_addr + (reg->key.current_addr - block->offset)); @@ -3701,7 +3700,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) " chunk: %" PRIx64, block->block_name, reg->key.chunk); ret = -ERANGE; - goto out; + goto err; } } chunk_start = ram_chunk_start(block, chunk); @@ -3713,7 +3712,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) chunk, chunk_start, chunk_end)) { error_report("cannot get rkey"); ret = -EINVAL; - goto out; + goto err; } reg_result->rkey = tmp_rkey; @@ -3730,7 +3729,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) if (ret < 0) { error_report("Failed to send control buffer"); - goto out; + goto err; } break; case RDMA_CONTROL_UNREGISTER_REQUEST: @@ -3753,7 +3752,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f) if (ret != 0) { perror("rdma unregistration chunk failed"); ret = -ret; - goto out; + goto err; } rdma->total_registrations--; @@ -3766,24 +3765,23 @@ static int qemu_rdma_registration_handle(QEMUFile *f) if (ret < 0) { error_report("Failed to send control buffer"); - goto out; + goto err; } break; case RDMA_CONTROL_REGISTER_RESULT: error_report("Invalid RESULT message at dest."); ret = -EIO; - goto out; + goto err; default: error_report("Unknown control message %s", control_desc(head.type)); ret = -EIO; - goto out; + goto err; } } while (1); -out: - if (ret < 0) { - rdma->error_state = ret; - } - return ret; + +err: + rdma->error_state = ret; + return -1; } /* Destination: @@ -3805,7 +3803,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name) rdma = qatomic_rcu_read(&rioc->rdmain); if (!rdma) { - return -EIO; + return -1; } /* Find the matching RAMBlock in our local list */ @@ -3818,7 +3816,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name) if (found == -1) { error_report("RAMBlock '%s' not found on destination", name); - return -ENOENT; + return -1; } rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index; @@ -3848,7 +3846,6 @@ static int qemu_rdma_registration_start(QEMUFile *f, { QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); RDMAContext *rdma; - int ret; if (migration_in_postcopy()) { return 0; @@ -3857,12 +3854,11 @@ static int qemu_rdma_registration_start(QEMUFile *f, RCU_READ_LOCK_GUARD(); rdma = qatomic_rcu_read(&rioc->rdmaout); if (!rdma) { - return -EIO; + return -1; } - ret = check_error_state(rdma); - if (ret) { - return ret; + if (check_error_state(rdma)) { + return -1; } trace_qemu_rdma_registration_start(flags); @@ -3891,12 +3887,11 @@ static int qemu_rdma_registration_stop(QEMUFile *f, RCU_READ_LOCK_GUARD(); rdma = qatomic_rcu_read(&rioc->rdmaout); if (!rdma) { - return -EIO; + return -1; } - ret = check_error_state(rdma); - if (ret) { - return ret; + if (check_error_state(rdma)) { + return -1; } qemu_fflush(f); @@ -3927,7 +3922,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, qemu_rdma_reg_whole_ram_blocks : NULL); if (ret < 0) { fprintf(stderr, "receiving remote info!"); - return ret; + return -1; } nb_dest_blocks = resp.len / sizeof(RDMADestBlock); @@ -3950,7 +3945,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, "not identical on both the source and destination.", local->nb_blocks, nb_dest_blocks); rdma->error_state = -EINVAL; - return -EINVAL; + return -1; } qemu_rdma_move_header(rdma, reg_result_idx, &resp); @@ -3966,7 +3961,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, local->block[i].length, rdma->dest_blocks[i].length); rdma->error_state = -EINVAL; - return -EINVAL; + return -1; } local->block[i].remote_host_addr = rdma->dest_blocks[i].remote_host_addr; @@ -3986,7 +3981,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, return 0; err: rdma->error_state = ret; - return ret; + return -1; } static const QEMUFileHooks rdma_read_hooks = {
The QEMUFileHooks methods don't come with a written contract. Digging through the code calling them, we find: * save_page(): Negative values RAM_SAVE_CONTROL_DELAYED and RAM_SAVE_CONTROL_NOT_SUPP are special. Any other negative value is an unspecified error. qemu_rdma_save_page() returns -EIO or rdma->error_state on error. I believe the latter is always negative. Nothing stops either of them to clash with the special values, though. Feels unlikely, but fix it anyway to return only the special values and -1. * before_ram_iterate(), after_ram_iterate(): Negative value means error. qemu_rdma_registration_start() and qemu_rdma_registration_stop() comply as far as I can tell. Make them comply *obviously*, by returning -1 on error. * hook_ram_load: Negative value means error. rdma_load_hook() already returns -1 on error. Leave it alone. Signed-off-by: Markus Armbruster <armbru@redhat.com> --- migration/rdma.c | 79 +++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 42 deletions(-)