diff mbox series

[RFC,v2,5/8] block.c: wrlock in bdrv_replace_child_noperm

Message ID 20220426085114.199647-6-eesposit@redhat.com
State New
Headers show
Series Removal of AioContext lock, bs->parents and ->children: new rwlock | expand

Commit Message

Emanuele Giuseppe Esposito April 26, 2022, 8:51 a.m. UTC
The only problem here is ->attach and ->detach callbacks
could call bdrv_{un}apply_subtree_drain(), which itself
will use a rdlock to navigate through all nodes.
To avoid deadlocks, take the lock only outside the drains,
and if we need to both attach and detach, do it in a single
critical section.

Therefore change ->attach and ->detach to return true if they
are modifying the lock, so that we don't take it twice or release
temporarly.

Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
 block.c                          | 31 +++++++++++++++++++++++++++----
 block/block-backend.c            |  6 ++++--
 include/block/block_int-common.h |  8 ++++++--
 3 files changed, 37 insertions(+), 8 deletions(-)

Comments

Paolo Bonzini April 26, 2022, 3:07 p.m. UTC | #1
On 4/26/22 10:51, Emanuele Giuseppe Esposito wrote:
> The only problem here is ->attach and ->detach callbacks
> could call bdrv_{un}apply_subtree_drain(), which itself
> will use a rdlock to navigate through all nodes.
> To avoid deadlocks, take the lock only outside the drains,
> and if we need to both attach and detach, do it in a single
> critical section.
> 
> Therefore change ->attach and ->detach to return true if they
> are modifying the lock, so that we don't take it twice or release
> temporarly.

An alternative way to implement this could be

struct GraphLockState {
     bool taken;
}

void bdrv_graph_ensure_wrlock(GraphLockState *state)
{
     if (state->taken) {
         return;
     }
     bdrv_graph_wrlock();
     state->taken = true;
}

void bdrv_graph_ensure_wrunlock(GraphLockState *state)
{
     ...
}

and pass the GraphLockState* to ->attach and ->detach.

Paolo
Stefan Hajnoczi April 28, 2022, 1:55 p.m. UTC | #2
On Tue, Apr 26, 2022 at 04:51:11AM -0400, Emanuele Giuseppe Esposito wrote:
> The only problem here is ->attach and ->detach callbacks
> could call bdrv_{un}apply_subtree_drain(), which itself
> will use a rdlock to navigate through all nodes.
> To avoid deadlocks, take the lock only outside the drains,
> and if we need to both attach and detach, do it in a single
> critical section.
> 
> Therefore change ->attach and ->detach to return true if they
> are modifying the lock, so that we don't take it twice or release
> temporarly.
> 
> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> ---
>  block.c                          | 31 +++++++++++++++++++++++++++----
>  block/block-backend.c            |  6 ++++--
>  include/block/block_int-common.h |  8 ++++++--
>  3 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/block.c b/block.c
> index b2eb679abb..6cd87e8dd3 100644
> --- a/block.c
> +++ b/block.c
> @@ -1434,21 +1434,26 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
>      *child_flags = flags;
>  }
>  
> -static void bdrv_child_cb_attach(BdrvChild *child)
> +static bool bdrv_child_cb_attach(BdrvChild *child)
>  {
>      BlockDriverState *bs = child->opaque;
>  
>      assert_bdrv_graph_writable(bs);
>      QLIST_INSERT_HEAD(&bs->children, child, next);
>  
> +    /* Paired with bdrv_graph_wrlock() in bdrv_replace_child_noperm */
> +    bdrv_graph_wrunlock();
> +
>      if (child->role & BDRV_CHILD_COW) {
>          bdrv_backing_attach(child);
>      }
>  
>      bdrv_apply_subtree_drain(child, bs);
> +
> +    return true;
>  }
>  
> -static void bdrv_child_cb_detach(BdrvChild *child)
> +static bool bdrv_child_cb_detach(BdrvChild *child)
>  {
>      BlockDriverState *bs = child->opaque;
>  
> @@ -1458,8 +1463,13 @@ static void bdrv_child_cb_detach(BdrvChild *child)
>  
>      bdrv_unapply_subtree_drain(child, bs);
>  
> +    /* Paired with bdrv_graph_wrunlock() in bdrv_replace_child_noperm */
> +    bdrv_graph_wrlock();
> +
>      assert_bdrv_graph_writable(bs);
>      QLIST_REMOVE(child, next);
> +
> +    return true;
>  }
>  
>  static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
> @@ -2842,6 +2852,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
>      BlockDriverState *old_bs = child->bs;
>      int new_bs_quiesce_counter;
>      int drain_saldo;
> +    bool locked = false;
>  
>      assert(!child->frozen);
>      assert(old_bs != new_bs);
> @@ -2868,8 +2879,12 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
>           * are already gone and we only end the drain sections that came from
>           * elsewhere. */
>          if (child->klass->detach) {
> -            child->klass->detach(child);
> +            locked = child->klass->detach(child);
> +        }
> +        if (!locked) {
> +            bdrv_graph_wrlock();
>          }
> +        locked = true;
>          assert_bdrv_graph_writable(old_bs);
>          QLIST_REMOVE(child, next_parent);
>      }
> @@ -2880,6 +2895,10 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
>      }
>  
>      if (new_bs) {
> +        if (!locked) {
> +            bdrv_graph_wrlock();
> +            locked = true;
> +        }
>          assert_bdrv_graph_writable(new_bs);
>          QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
>  
> @@ -2896,10 +2915,14 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
>           * drain sections coming from @child don't get an extra .drained_begin
>           * callback. */
>          if (child->klass->attach) {
> -            child->klass->attach(child);
> +            locked = !(child->klass->attach(child));

O_O I don't understand what the return value of ->attach() means. It has
the opposite meaning to the return value of ->detach()?

>          }
>      }
>  
> +    if (locked) {
> +        bdrv_graph_wrunlock();
> +    }
> +
>      /*
>       * If the old child node was drained but the new one is not, allow
>       * requests to come in only after the new node has been attached.
> diff --git a/block/block-backend.c b/block/block-backend.c
> index e0e1aff4b1..5dbd9fceae 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -282,7 +282,7 @@ static int blk_root_inactivate(BdrvChild *child)
>      return 0;
>  }
>  
> -static void blk_root_attach(BdrvChild *child)
> +static bool blk_root_attach(BdrvChild *child)
>  {
>      BlockBackend *blk = child->opaque;
>      BlockBackendAioNotifier *notifier;
> @@ -295,9 +295,10 @@ static void blk_root_attach(BdrvChild *child)
>                  notifier->detach_aio_context,
>                  notifier->opaque);
>      }
> +    return false;
>  }
>  
> -static void blk_root_detach(BdrvChild *child)
> +static bool blk_root_detach(BdrvChild *child)
>  {
>      BlockBackend *blk = child->opaque;
>      BlockBackendAioNotifier *notifier;
> @@ -310,6 +311,7 @@ static void blk_root_detach(BdrvChild *child)
>                  notifier->detach_aio_context,
>                  notifier->opaque);
>      }
> +    return false;
>  }
>  
>  static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
> diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
> index 5a04c778e4..dd058c1fd8 100644
> --- a/include/block/block_int-common.h
> +++ b/include/block/block_int-common.h
> @@ -857,8 +857,12 @@ struct BdrvChildClass {
>      void (*activate)(BdrvChild *child, Error **errp);
>      int (*inactivate)(BdrvChild *child);
>  
> -    void (*attach)(BdrvChild *child);
> -    void (*detach)(BdrvChild *child);
> +    /*
> +     * Return true if the graph wrlock is taken/released,

What does "taken/released" mean? Does it mean released by attach and
taken by detach?

Also, please document which locks are held when these callbacks are
invoked.

> +     * false if the wrlock state is not changed.
> +     */
> +    bool (*attach)(BdrvChild *child);
> +    bool (*detach)(BdrvChild *child);
>  
>      /*
>       * Notifies the parent that the filename of its child has changed (e.g.
> -- 
> 2.31.1
>
Emanuele Giuseppe Esposito April 29, 2022, 8:41 a.m. UTC | #3
Am 28/04/2022 um 15:55 schrieb Stefan Hajnoczi:
> On Tue, Apr 26, 2022 at 04:51:11AM -0400, Emanuele Giuseppe Esposito wrote:
>> The only problem here is ->attach and ->detach callbacks
>> could call bdrv_{un}apply_subtree_drain(), which itself
>> will use a rdlock to navigate through all nodes.
>> To avoid deadlocks, take the lock only outside the drains,
>> and if we need to both attach and detach, do it in a single
>> critical section.
>>
>> Therefore change ->attach and ->detach to return true if they
>> are modifying the lock, so that we don't take it twice or release
>> temporarly.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>> ---
>>  block.c                          | 31 +++++++++++++++++++++++++++----
>>  block/block-backend.c            |  6 ++++--
>>  include/block/block_int-common.h |  8 ++++++--
>>  3 files changed, 37 insertions(+), 8 deletions(-)
>>
>> diff --git a/block.c b/block.c
>> index b2eb679abb..6cd87e8dd3 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -1434,21 +1434,26 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
>>      *child_flags = flags;
>>  }
>>  
>> -static void bdrv_child_cb_attach(BdrvChild *child)
>> +static bool bdrv_child_cb_attach(BdrvChild *child)
>>  {
>>      BlockDriverState *bs = child->opaque;
>>  
>>      assert_bdrv_graph_writable(bs);
>>      QLIST_INSERT_HEAD(&bs->children, child, next);
>>  
>> +    /* Paired with bdrv_graph_wrlock() in bdrv_replace_child_noperm */
>> +    bdrv_graph_wrunlock();
>> +
>>      if (child->role & BDRV_CHILD_COW) {
>>          bdrv_backing_attach(child);
>>      }
>>  
>>      bdrv_apply_subtree_drain(child, bs);
>> +
>> +    return true;
>>  }
>>  
>> -static void bdrv_child_cb_detach(BdrvChild *child)
>> +static bool bdrv_child_cb_detach(BdrvChild *child)
>>  {
>>      BlockDriverState *bs = child->opaque;
>>  
>> @@ -1458,8 +1463,13 @@ static void bdrv_child_cb_detach(BdrvChild *child)
>>  
>>      bdrv_unapply_subtree_drain(child, bs);
>>  
>> +    /* Paired with bdrv_graph_wrunlock() in bdrv_replace_child_noperm */
>> +    bdrv_graph_wrlock();
>> +
>>      assert_bdrv_graph_writable(bs);
>>      QLIST_REMOVE(child, next);
>> +
>> +    return true;
>>  }
>>  
>>  static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
>> @@ -2842,6 +2852,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
>>      BlockDriverState *old_bs = child->bs;
>>      int new_bs_quiesce_counter;
>>      int drain_saldo;
>> +    bool locked = false;
>>  
>>      assert(!child->frozen);
>>      assert(old_bs != new_bs);
>> @@ -2868,8 +2879,12 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
>>           * are already gone and we only end the drain sections that came from
>>           * elsewhere. */
>>          if (child->klass->detach) {
>> -            child->klass->detach(child);
>> +            locked = child->klass->detach(child);
>> +        }
>> +        if (!locked) {
>> +            bdrv_graph_wrlock();
>>          }
>> +        locked = true;
>>          assert_bdrv_graph_writable(old_bs);
>>          QLIST_REMOVE(child, next_parent);
>>      }
>> @@ -2880,6 +2895,10 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
>>      }
>>  
>>      if (new_bs) {
>> +        if (!locked) {
>> +            bdrv_graph_wrlock();
>> +            locked = true;
>> +        }
>>          assert_bdrv_graph_writable(new_bs);
>>          QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
>>  
>> @@ -2896,10 +2915,14 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
>>           * drain sections coming from @child don't get an extra .drained_begin
>>           * callback. */
>>          if (child->klass->attach) {
>> -            child->klass->attach(child);
>> +            locked = !(child->klass->attach(child));
> 
> O_O I don't understand what the return value of ->attach() means. It has
> the opposite meaning to the return value of ->detach()?

It means "state of the lock changed". So for ->attach(), if it is
changed (went to unlock), we want locked = false.

I will probably switch to Paolo's suggestion, it's cleaner.
> 
>>          }
>>      }
>>  
>> +    if (locked) {
>> +        bdrv_graph_wrunlock();
>> +    }
>> +
>>      /*
>>       * If the old child node was drained but the new one is not, allow
>>       * requests to come in only after the new node has been attached.
>> diff --git a/block/block-backend.c b/block/block-backend.c
>> index e0e1aff4b1..5dbd9fceae 100644
>> --- a/block/block-backend.c
>> +++ b/block/block-backend.c
>> @@ -282,7 +282,7 @@ static int blk_root_inactivate(BdrvChild *child)
>>      return 0;
>>  }
>>  
>> -static void blk_root_attach(BdrvChild *child)
>> +static bool blk_root_attach(BdrvChild *child)
>>  {
>>      BlockBackend *blk = child->opaque;
>>      BlockBackendAioNotifier *notifier;
>> @@ -295,9 +295,10 @@ static void blk_root_attach(BdrvChild *child)
>>                  notifier->detach_aio_context,
>>                  notifier->opaque);
>>      }
>> +    return false;
>>  }
>>  
>> -static void blk_root_detach(BdrvChild *child)
>> +static bool blk_root_detach(BdrvChild *child)
>>  {
>>      BlockBackend *blk = child->opaque;
>>      BlockBackendAioNotifier *notifier;
>> @@ -310,6 +311,7 @@ static void blk_root_detach(BdrvChild *child)
>>                  notifier->detach_aio_context,
>>                  notifier->opaque);
>>      }
>> +    return false;
>>  }
>>  
>>  static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
>> diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
>> index 5a04c778e4..dd058c1fd8 100644
>> --- a/include/block/block_int-common.h
>> +++ b/include/block/block_int-common.h
>> @@ -857,8 +857,12 @@ struct BdrvChildClass {
>>      void (*activate)(BdrvChild *child, Error **errp);
>>      int (*inactivate)(BdrvChild *child);
>>  
>> -    void (*attach)(BdrvChild *child);
>> -    void (*detach)(BdrvChild *child);
>> +    /*
>> +     * Return true if the graph wrlock is taken/released,
> 
> What does "taken/released" mean? Does it mean released by attach and
> taken by detach?

Yes
> 
> Also, please document which locks are held when these callbacks are
> invoked.
> 
>> +     * false if the wrlock state is not changed.
>> +     */
>> +    bool (*attach)(BdrvChild *child);
>> +    bool (*detach)(BdrvChild *child);
>>  
>>      /*
>>       * Notifies the parent that the filename of its child has changed (e.g.
>> -- 
>> 2.31.1
>>
diff mbox series

Patch

diff --git a/block.c b/block.c
index b2eb679abb..6cd87e8dd3 100644
--- a/block.c
+++ b/block.c
@@ -1434,21 +1434,26 @@  static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
     *child_flags = flags;
 }
 
-static void bdrv_child_cb_attach(BdrvChild *child)
+static bool bdrv_child_cb_attach(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
 
     assert_bdrv_graph_writable(bs);
     QLIST_INSERT_HEAD(&bs->children, child, next);
 
+    /* Paired with bdrv_graph_wrlock() in bdrv_replace_child_noperm */
+    bdrv_graph_wrunlock();
+
     if (child->role & BDRV_CHILD_COW) {
         bdrv_backing_attach(child);
     }
 
     bdrv_apply_subtree_drain(child, bs);
+
+    return true;
 }
 
-static void bdrv_child_cb_detach(BdrvChild *child)
+static bool bdrv_child_cb_detach(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
 
@@ -1458,8 +1463,13 @@  static void bdrv_child_cb_detach(BdrvChild *child)
 
     bdrv_unapply_subtree_drain(child, bs);
 
+    /* Paired with bdrv_graph_wrunlock() in bdrv_replace_child_noperm */
+    bdrv_graph_wrlock();
+
     assert_bdrv_graph_writable(bs);
     QLIST_REMOVE(child, next);
+
+    return true;
 }
 
 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
@@ -2842,6 +2852,7 @@  static void bdrv_replace_child_noperm(BdrvChild **childp,
     BlockDriverState *old_bs = child->bs;
     int new_bs_quiesce_counter;
     int drain_saldo;
+    bool locked = false;
 
     assert(!child->frozen);
     assert(old_bs != new_bs);
@@ -2868,8 +2879,12 @@  static void bdrv_replace_child_noperm(BdrvChild **childp,
          * are already gone and we only end the drain sections that came from
          * elsewhere. */
         if (child->klass->detach) {
-            child->klass->detach(child);
+            locked = child->klass->detach(child);
+        }
+        if (!locked) {
+            bdrv_graph_wrlock();
         }
+        locked = true;
         assert_bdrv_graph_writable(old_bs);
         QLIST_REMOVE(child, next_parent);
     }
@@ -2880,6 +2895,10 @@  static void bdrv_replace_child_noperm(BdrvChild **childp,
     }
 
     if (new_bs) {
+        if (!locked) {
+            bdrv_graph_wrlock();
+            locked = true;
+        }
         assert_bdrv_graph_writable(new_bs);
         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
 
@@ -2896,10 +2915,14 @@  static void bdrv_replace_child_noperm(BdrvChild **childp,
          * drain sections coming from @child don't get an extra .drained_begin
          * callback. */
         if (child->klass->attach) {
-            child->klass->attach(child);
+            locked = !(child->klass->attach(child));
         }
     }
 
+    if (locked) {
+        bdrv_graph_wrunlock();
+    }
+
     /*
      * If the old child node was drained but the new one is not, allow
      * requests to come in only after the new node has been attached.
diff --git a/block/block-backend.c b/block/block-backend.c
index e0e1aff4b1..5dbd9fceae 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -282,7 +282,7 @@  static int blk_root_inactivate(BdrvChild *child)
     return 0;
 }
 
-static void blk_root_attach(BdrvChild *child)
+static bool blk_root_attach(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
     BlockBackendAioNotifier *notifier;
@@ -295,9 +295,10 @@  static void blk_root_attach(BdrvChild *child)
                 notifier->detach_aio_context,
                 notifier->opaque);
     }
+    return false;
 }
 
-static void blk_root_detach(BdrvChild *child)
+static bool blk_root_detach(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
     BlockBackendAioNotifier *notifier;
@@ -310,6 +311,7 @@  static void blk_root_detach(BdrvChild *child)
                 notifier->detach_aio_context,
                 notifier->opaque);
     }
+    return false;
 }
 
 static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 5a04c778e4..dd058c1fd8 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -857,8 +857,12 @@  struct BdrvChildClass {
     void (*activate)(BdrvChild *child, Error **errp);
     int (*inactivate)(BdrvChild *child);
 
-    void (*attach)(BdrvChild *child);
-    void (*detach)(BdrvChild *child);
+    /*
+     * Return true if the graph wrlock is taken/released,
+     * false if the wrlock state is not changed.
+     */
+    bool (*attach)(BdrvChild *child);
+    bool (*detach)(BdrvChild *child);
 
     /*
      * Notifies the parent that the filename of its child has changed (e.g.