diff mbox series

tests/qtest: failover: fix infinite loop

Message ID 20220329124259.355995-1-lvivier@redhat.com
State New
Headers show
Series tests/qtest: failover: fix infinite loop | expand

Commit Message

Laurent Vivier March 29, 2022, 12:42 p.m. UTC
If the migration is over before we cancel it, we are
waiting in a loop a state that never comes because the state
is already "completed".

To avoid an infinite loop, skip the test if the migration
is "completed" before we were able to cancel it.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 tests/qtest/virtio-net-failover.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

Comments

Thomas Huth March 29, 2022, 12:47 p.m. UTC | #1
On 29/03/2022 14.42, Laurent Vivier wrote:
> If the migration is over before we cancel it, we are
> waiting in a loop a state that never comes because the state
> is already "completed".
> 
> To avoid an infinite loop, skip the test if the migration
> is "completed" before we were able to cancel it.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>   tests/qtest/virtio-net-failover.c | 29 +++++++++++++++++++++++++----
>   1 file changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c
> index 80292eecf65f..78811f1c9216 100644
> --- a/tests/qtest/virtio-net-failover.c
> +++ b/tests/qtest/virtio-net-failover.c
> @@ -1141,6 +1141,11 @@ static void test_migrate_guest_off_abort(gconstpointer opaque)
>           ret = migrate_status(qts);
>   
>           status = qdict_get_str(ret, "status");
> +        if (strcmp(status, "completed") == 0) {
> +            g_test_skip("Failed to cancel the migration");
> +            qobject_unref(ret);
> +            goto out;
> +        }
>           if (strcmp(status, "active") == 0) {
>               qobject_unref(ret);
>               break;
> @@ -1155,8 +1160,12 @@ static void test_migrate_guest_off_abort(gconstpointer opaque)
>   
>       while (true) {
>           ret = migrate_status(qts);
> -
>           status = qdict_get_str(ret, "status");
> +        if (strcmp(status, "completed") == 0) {
> +            g_test_skip("Failed to cancel the migration");
> +            qobject_unref(ret);
> +            goto out;
> +        }
>           if (strcmp(status, "cancelled") == 0) {
>               qobject_unref(ret);
>               break;
> @@ -1169,6 +1178,7 @@ static void test_migrate_guest_off_abort(gconstpointer opaque)
>       check_one_card(qts, true, "standby0", MAC_STANDBY0);
>       check_one_card(qts, false, "primary0", MAC_PRIMARY0);
>   
> +out:
>       qos_object_destroy((QOSGraphObject *)vdev);
>       machine_stop(qts);
>   }
> @@ -1251,8 +1261,7 @@ static void test_migrate_abort_wait_unplug(gconstpointer opaque)
>               qobject_unref(ret);
>               break;
>           }
> -        g_assert_cmpstr(status, !=, "failed");
> -        g_assert_cmpstr(status, !=, "active");
> +        g_assert_cmpstr(status, ==, "cancelling");
>           qobject_unref(ret);
>       }
>   
> @@ -1324,11 +1333,11 @@ static void test_migrate_abort_active(gconstpointer opaque)
>           ret = migrate_status(qts);
>   
>           status = qdict_get_str(ret, "status");
> +        g_assert_cmpstr(status, !=, "failed");
>           if (strcmp(status, "wait-unplug") != 0) {
>               qobject_unref(ret);
>               break;
>           }
> -        g_assert_cmpstr(status, !=, "failed");
>           qobject_unref(ret);
>       }
>   
> @@ -1340,6 +1349,11 @@ static void test_migrate_abort_active(gconstpointer opaque)
>           ret = migrate_status(qts);
>   
>           status = qdict_get_str(ret, "status");
> +        if (strcmp(status, "completed") == 0) {
> +            g_test_skip("Failed to cancel the migration");
> +            qobject_unref(ret);
> +            goto out;
> +        }
>           if (strcmp(status, "cancelled") == 0) {
>               qobject_unref(ret);
>               break;
> @@ -1352,6 +1366,7 @@ static void test_migrate_abort_active(gconstpointer opaque)
>       check_one_card(qts, true, "standby0", MAC_STANDBY0);
>       check_one_card(qts, true, "primary0", MAC_PRIMARY0);
>   
> +out:
>       qos_object_destroy((QOSGraphObject *)vdev);
>       machine_stop(qts);
>   }
> @@ -1425,6 +1440,11 @@ static void test_migrate_off_abort(gconstpointer opaque)
>           ret = migrate_status(qts);
>   
>           status = qdict_get_str(ret, "status");
> +        if (strcmp(status, "completed") == 0) {
> +            g_test_skip("Failed to cancel the migration");
> +            qobject_unref(ret);
> +            goto out;
> +        }
>           if (strcmp(status, "cancelled") == 0) {
>               qobject_unref(ret);
>               break;
> @@ -1437,6 +1457,7 @@ static void test_migrate_off_abort(gconstpointer opaque)
>       check_one_card(qts, true, "standby0", MAC_STANDBY0);
>       check_one_card(qts, true, "primary0", MAC_PRIMARY0);
>   
> +out:
>       qos_object_destroy((QOSGraphObject *)vdev);
>       machine_stop(qts);
>   }

Acked-by: Thomas Huth <thuth@redhat.com>

Is this still urgent for 7.0, or can it wait for the 7.1 cycle?

  Thomas
Peter Maydell March 29, 2022, 1:23 p.m. UTC | #2
On Tue, 29 Mar 2022 at 13:47, Thomas Huth <thuth@redhat.com> wrote:
>
> On 29/03/2022 14.42, Laurent Vivier wrote:
> > If the migration is over before we cancel it, we are
> > waiting in a loop a state that never comes because the state
> > is already "completed".
> >
> > To avoid an infinite loop, skip the test if the migration
> > is "completed" before we were able to cancel it.

> Is this still urgent for 7.0, or can it wait for the 7.1 cycle?

It's a test case change that fixes at least one hang I've seen
in "make check". I prefer those to go in, at least before rc3,
because the CI loop being unreliable makes the whole release
process slower and more annoying.

thanks
-- PMM
Thomas Huth March 29, 2022, 1:25 p.m. UTC | #3
On 29/03/2022 15.23, Peter Maydell wrote:
> On Tue, 29 Mar 2022 at 13:47, Thomas Huth <thuth@redhat.com> wrote:
>>
>> On 29/03/2022 14.42, Laurent Vivier wrote:
>>> If the migration is over before we cancel it, we are
>>> waiting in a loop a state that never comes because the state
>>> is already "completed".
>>>
>>> To avoid an infinite loop, skip the test if the migration
>>> is "completed" before we were able to cancel it.
> 
>> Is this still urgent for 7.0, or can it wait for the 7.1 cycle?
> 
> It's a test case change that fixes at least one hang I've seen
> in "make check". I prefer those to go in, at least before rc3,
> because the CI loop being unreliable makes the whole release
> process slower and more annoying.

Ok. Do you want to pick it directly, or shall I create a pull request for 
this? (I don't have much else queued right now, that's why I ask)

  Thomas
Peter Maydell March 29, 2022, 1:27 p.m. UTC | #4
On Tue, 29 Mar 2022 at 14:25, Thomas Huth <thuth@redhat.com> wrote:
>
> On 29/03/2022 15.23, Peter Maydell wrote:
> > On Tue, 29 Mar 2022 at 13:47, Thomas Huth <thuth@redhat.com> wrote:
> >>
> >> On 29/03/2022 14.42, Laurent Vivier wrote:
> >>> If the migration is over before we cancel it, we are
> >>> waiting in a loop a state that never comes because the state
> >>> is already "completed".
> >>>
> >>> To avoid an infinite loop, skip the test if the migration
> >>> is "completed" before we were able to cancel it.
> >
> >> Is this still urgent for 7.0, or can it wait for the 7.1 cycle?
> >
> > It's a test case change that fixes at least one hang I've seen
> > in "make check". I prefer those to go in, at least before rc3,
> > because the CI loop being unreliable makes the whole release
> > process slower and more annoying.
>
> Ok. Do you want to pick it directly, or shall I create a pull request for
> this? (I don't have much else queued right now, that's why I ask)

I can apply it directly if you don't have anything else you're
sending anyway.

thanks
-- PMM
Thomas Huth March 29, 2022, 1:28 p.m. UTC | #5
On 29/03/2022 15.27, Peter Maydell wrote:
> On Tue, 29 Mar 2022 at 14:25, Thomas Huth <thuth@redhat.com> wrote:
>>
>> On 29/03/2022 15.23, Peter Maydell wrote:
>>> On Tue, 29 Mar 2022 at 13:47, Thomas Huth <thuth@redhat.com> wrote:
>>>>
>>>> On 29/03/2022 14.42, Laurent Vivier wrote:
>>>>> If the migration is over before we cancel it, we are
>>>>> waiting in a loop a state that never comes because the state
>>>>> is already "completed".
>>>>>
>>>>> To avoid an infinite loop, skip the test if the migration
>>>>> is "completed" before we were able to cancel it.
>>>
>>>> Is this still urgent for 7.0, or can it wait for the 7.1 cycle?
>>>
>>> It's a test case change that fixes at least one hang I've seen
>>> in "make check". I prefer those to go in, at least before rc3,
>>> because the CI loop being unreliable makes the whole release
>>> process slower and more annoying.
>>
>> Ok. Do you want to pick it directly, or shall I create a pull request for
>> this? (I don't have much else queued right now, that's why I ask)
> 
> I can apply it directly if you don't have anything else you're
> sending anyway.

Ok, then please apply directly! Thanks!

  Thomas
Peter Maydell March 29, 2022, 6:13 p.m. UTC | #6
On Tue, 29 Mar 2022 at 13:43, Laurent Vivier <lvivier@redhat.com> wrote:
>
> If the migration is over before we cancel it, we are
> waiting in a loop a state that never comes because the state
> is already "completed".
>
> To avoid an infinite loop, skip the test if the migration
> is "completed" before we were able to cancel it.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  tests/qtest/virtio-net-failover.c | 29 +++++++++++++++++++++++++----
>  1 file changed, 25 insertions(+), 4 deletions(-)

Applied to master, thanks.

-- PMM
Dr. David Alan Gilbert April 4, 2022, 5:48 p.m. UTC | #7
* Laurent Vivier (lvivier@redhat.com) wrote:
> If the migration is over before we cancel it, we are
> waiting in a loop a state that never comes because the state
> is already "completed".
> 
> To avoid an infinite loop, skip the test if the migration
> is "completed" before we were able to cancel it.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

If you're finding it's skipping to often, you might try setting the
migration bandwidth really low right at the start (a few bytes/second)
to ensure it doesn't complete under your feet.

Dave

> ---
>  tests/qtest/virtio-net-failover.c | 29 +++++++++++++++++++++++++----
>  1 file changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c
> index 80292eecf65f..78811f1c9216 100644
> --- a/tests/qtest/virtio-net-failover.c
> +++ b/tests/qtest/virtio-net-failover.c
> @@ -1141,6 +1141,11 @@ static void test_migrate_guest_off_abort(gconstpointer opaque)
>          ret = migrate_status(qts);
>  
>          status = qdict_get_str(ret, "status");
> +        if (strcmp(status, "completed") == 0) {
> +            g_test_skip("Failed to cancel the migration");
> +            qobject_unref(ret);
> +            goto out;
> +        }
>          if (strcmp(status, "active") == 0) {
>              qobject_unref(ret);
>              break;
> @@ -1155,8 +1160,12 @@ static void test_migrate_guest_off_abort(gconstpointer opaque)
>  
>      while (true) {
>          ret = migrate_status(qts);
> -
>          status = qdict_get_str(ret, "status");
> +        if (strcmp(status, "completed") == 0) {
> +            g_test_skip("Failed to cancel the migration");
> +            qobject_unref(ret);
> +            goto out;
> +        }
>          if (strcmp(status, "cancelled") == 0) {
>              qobject_unref(ret);
>              break;
> @@ -1169,6 +1178,7 @@ static void test_migrate_guest_off_abort(gconstpointer opaque)
>      check_one_card(qts, true, "standby0", MAC_STANDBY0);
>      check_one_card(qts, false, "primary0", MAC_PRIMARY0);
>  
> +out:
>      qos_object_destroy((QOSGraphObject *)vdev);
>      machine_stop(qts);
>  }
> @@ -1251,8 +1261,7 @@ static void test_migrate_abort_wait_unplug(gconstpointer opaque)
>              qobject_unref(ret);
>              break;
>          }
> -        g_assert_cmpstr(status, !=, "failed");
> -        g_assert_cmpstr(status, !=, "active");
> +        g_assert_cmpstr(status, ==, "cancelling");
>          qobject_unref(ret);
>      }
>  
> @@ -1324,11 +1333,11 @@ static void test_migrate_abort_active(gconstpointer opaque)
>          ret = migrate_status(qts);
>  
>          status = qdict_get_str(ret, "status");
> +        g_assert_cmpstr(status, !=, "failed");
>          if (strcmp(status, "wait-unplug") != 0) {
>              qobject_unref(ret);
>              break;
>          }
> -        g_assert_cmpstr(status, !=, "failed");
>          qobject_unref(ret);
>      }
>  
> @@ -1340,6 +1349,11 @@ static void test_migrate_abort_active(gconstpointer opaque)
>          ret = migrate_status(qts);
>  
>          status = qdict_get_str(ret, "status");
> +        if (strcmp(status, "completed") == 0) {
> +            g_test_skip("Failed to cancel the migration");
> +            qobject_unref(ret);
> +            goto out;
> +        }
>          if (strcmp(status, "cancelled") == 0) {
>              qobject_unref(ret);
>              break;
> @@ -1352,6 +1366,7 @@ static void test_migrate_abort_active(gconstpointer opaque)
>      check_one_card(qts, true, "standby0", MAC_STANDBY0);
>      check_one_card(qts, true, "primary0", MAC_PRIMARY0);
>  
> +out:
>      qos_object_destroy((QOSGraphObject *)vdev);
>      machine_stop(qts);
>  }
> @@ -1425,6 +1440,11 @@ static void test_migrate_off_abort(gconstpointer opaque)
>          ret = migrate_status(qts);
>  
>          status = qdict_get_str(ret, "status");
> +        if (strcmp(status, "completed") == 0) {
> +            g_test_skip("Failed to cancel the migration");
> +            qobject_unref(ret);
> +            goto out;
> +        }
>          if (strcmp(status, "cancelled") == 0) {
>              qobject_unref(ret);
>              break;
> @@ -1437,6 +1457,7 @@ static void test_migrate_off_abort(gconstpointer opaque)
>      check_one_card(qts, true, "standby0", MAC_STANDBY0);
>      check_one_card(qts, true, "primary0", MAC_PRIMARY0);
>  
> +out:
>      qos_object_destroy((QOSGraphObject *)vdev);
>      machine_stop(qts);
>  }
> -- 
> 2.35.1
>
diff mbox series

Patch

diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c
index 80292eecf65f..78811f1c9216 100644
--- a/tests/qtest/virtio-net-failover.c
+++ b/tests/qtest/virtio-net-failover.c
@@ -1141,6 +1141,11 @@  static void test_migrate_guest_off_abort(gconstpointer opaque)
         ret = migrate_status(qts);
 
         status = qdict_get_str(ret, "status");
+        if (strcmp(status, "completed") == 0) {
+            g_test_skip("Failed to cancel the migration");
+            qobject_unref(ret);
+            goto out;
+        }
         if (strcmp(status, "active") == 0) {
             qobject_unref(ret);
             break;
@@ -1155,8 +1160,12 @@  static void test_migrate_guest_off_abort(gconstpointer opaque)
 
     while (true) {
         ret = migrate_status(qts);
-
         status = qdict_get_str(ret, "status");
+        if (strcmp(status, "completed") == 0) {
+            g_test_skip("Failed to cancel the migration");
+            qobject_unref(ret);
+            goto out;
+        }
         if (strcmp(status, "cancelled") == 0) {
             qobject_unref(ret);
             break;
@@ -1169,6 +1178,7 @@  static void test_migrate_guest_off_abort(gconstpointer opaque)
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, false, "primary0", MAC_PRIMARY0);
 
+out:
     qos_object_destroy((QOSGraphObject *)vdev);
     machine_stop(qts);
 }
@@ -1251,8 +1261,7 @@  static void test_migrate_abort_wait_unplug(gconstpointer opaque)
             qobject_unref(ret);
             break;
         }
-        g_assert_cmpstr(status, !=, "failed");
-        g_assert_cmpstr(status, !=, "active");
+        g_assert_cmpstr(status, ==, "cancelling");
         qobject_unref(ret);
     }
 
@@ -1324,11 +1333,11 @@  static void test_migrate_abort_active(gconstpointer opaque)
         ret = migrate_status(qts);
 
         status = qdict_get_str(ret, "status");
+        g_assert_cmpstr(status, !=, "failed");
         if (strcmp(status, "wait-unplug") != 0) {
             qobject_unref(ret);
             break;
         }
-        g_assert_cmpstr(status, !=, "failed");
         qobject_unref(ret);
     }
 
@@ -1340,6 +1349,11 @@  static void test_migrate_abort_active(gconstpointer opaque)
         ret = migrate_status(qts);
 
         status = qdict_get_str(ret, "status");
+        if (strcmp(status, "completed") == 0) {
+            g_test_skip("Failed to cancel the migration");
+            qobject_unref(ret);
+            goto out;
+        }
         if (strcmp(status, "cancelled") == 0) {
             qobject_unref(ret);
             break;
@@ -1352,6 +1366,7 @@  static void test_migrate_abort_active(gconstpointer opaque)
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, true, "primary0", MAC_PRIMARY0);
 
+out:
     qos_object_destroy((QOSGraphObject *)vdev);
     machine_stop(qts);
 }
@@ -1425,6 +1440,11 @@  static void test_migrate_off_abort(gconstpointer opaque)
         ret = migrate_status(qts);
 
         status = qdict_get_str(ret, "status");
+        if (strcmp(status, "completed") == 0) {
+            g_test_skip("Failed to cancel the migration");
+            qobject_unref(ret);
+            goto out;
+        }
         if (strcmp(status, "cancelled") == 0) {
             qobject_unref(ret);
             break;
@@ -1437,6 +1457,7 @@  static void test_migrate_off_abort(gconstpointer opaque)
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, true, "primary0", MAC_PRIMARY0);
 
+out:
     qos_object_destroy((QOSGraphObject *)vdev);
     machine_stop(qts);
 }