Message ID | 20220426085114.199647-6-eesposit@redhat.com |
---|---|
State | New |
Headers | show |
Series | Removal of AioContext lock, bs->parents and ->children: new rwlock | expand |
On 4/26/22 10:51, Emanuele Giuseppe Esposito wrote: > The only problem here is ->attach and ->detach callbacks > could call bdrv_{un}apply_subtree_drain(), which itself > will use a rdlock to navigate through all nodes. > To avoid deadlocks, take the lock only outside the drains, > and if we need to both attach and detach, do it in a single > critical section. > > Therefore change ->attach and ->detach to return true if they > are modifying the lock, so that we don't take it twice or release > temporarly. An alternative way to implement this could be struct GraphLockState { bool taken; } void bdrv_graph_ensure_wrlock(GraphLockState *state) { if (state->taken) { return; } bdrv_graph_wrlock(); state->taken = true; } void bdrv_graph_ensure_wrunlock(GraphLockState *state) { ... } and pass the GraphLockState* to ->attach and ->detach. Paolo
On Tue, Apr 26, 2022 at 04:51:11AM -0400, Emanuele Giuseppe Esposito wrote: > The only problem here is ->attach and ->detach callbacks > could call bdrv_{un}apply_subtree_drain(), which itself > will use a rdlock to navigate through all nodes. > To avoid deadlocks, take the lock only outside the drains, > and if we need to both attach and detach, do it in a single > critical section. > > Therefore change ->attach and ->detach to return true if they > are modifying the lock, so that we don't take it twice or release > temporarly. > > Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> > --- > block.c | 31 +++++++++++++++++++++++++++---- > block/block-backend.c | 6 ++++-- > include/block/block_int-common.h | 8 ++++++-- > 3 files changed, 37 insertions(+), 8 deletions(-) > > diff --git a/block.c b/block.c > index b2eb679abb..6cd87e8dd3 100644 > --- a/block.c > +++ b/block.c > @@ -1434,21 +1434,26 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, > *child_flags = flags; > } > > -static void bdrv_child_cb_attach(BdrvChild *child) > +static bool bdrv_child_cb_attach(BdrvChild *child) > { > BlockDriverState *bs = child->opaque; > > assert_bdrv_graph_writable(bs); > QLIST_INSERT_HEAD(&bs->children, child, next); > > + /* Paired with bdrv_graph_wrlock() in bdrv_replace_child_noperm */ > + bdrv_graph_wrunlock(); > + > if (child->role & BDRV_CHILD_COW) { > bdrv_backing_attach(child); > } > > bdrv_apply_subtree_drain(child, bs); > + > + return true; > } > > -static void bdrv_child_cb_detach(BdrvChild *child) > +static bool bdrv_child_cb_detach(BdrvChild *child) > { > BlockDriverState *bs = child->opaque; > > @@ -1458,8 +1463,13 @@ static void bdrv_child_cb_detach(BdrvChild *child) > > bdrv_unapply_subtree_drain(child, bs); > > + /* Paired with bdrv_graph_wrunlock() in bdrv_replace_child_noperm */ > + bdrv_graph_wrlock(); > + > assert_bdrv_graph_writable(bs); > QLIST_REMOVE(child, next); > + > + return true; > } > > static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, > @@ -2842,6 +2852,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, > BlockDriverState *old_bs = child->bs; > int new_bs_quiesce_counter; > int drain_saldo; > + bool locked = false; > > assert(!child->frozen); > assert(old_bs != new_bs); > @@ -2868,8 +2879,12 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, > * are already gone and we only end the drain sections that came from > * elsewhere. */ > if (child->klass->detach) { > - child->klass->detach(child); > + locked = child->klass->detach(child); > + } > + if (!locked) { > + bdrv_graph_wrlock(); > } > + locked = true; > assert_bdrv_graph_writable(old_bs); > QLIST_REMOVE(child, next_parent); > } > @@ -2880,6 +2895,10 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, > } > > if (new_bs) { > + if (!locked) { > + bdrv_graph_wrlock(); > + locked = true; > + } > assert_bdrv_graph_writable(new_bs); > QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); > > @@ -2896,10 +2915,14 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, > * drain sections coming from @child don't get an extra .drained_begin > * callback. */ > if (child->klass->attach) { > - child->klass->attach(child); > + locked = !(child->klass->attach(child)); O_O I don't understand what the return value of ->attach() means. It has the opposite meaning to the return value of ->detach()? > } > } > > + if (locked) { > + bdrv_graph_wrunlock(); > + } > + > /* > * If the old child node was drained but the new one is not, allow > * requests to come in only after the new node has been attached. > diff --git a/block/block-backend.c b/block/block-backend.c > index e0e1aff4b1..5dbd9fceae 100644 > --- a/block/block-backend.c > +++ b/block/block-backend.c > @@ -282,7 +282,7 @@ static int blk_root_inactivate(BdrvChild *child) > return 0; > } > > -static void blk_root_attach(BdrvChild *child) > +static bool blk_root_attach(BdrvChild *child) > { > BlockBackend *blk = child->opaque; > BlockBackendAioNotifier *notifier; > @@ -295,9 +295,10 @@ static void blk_root_attach(BdrvChild *child) > notifier->detach_aio_context, > notifier->opaque); > } > + return false; > } > > -static void blk_root_detach(BdrvChild *child) > +static bool blk_root_detach(BdrvChild *child) > { > BlockBackend *blk = child->opaque; > BlockBackendAioNotifier *notifier; > @@ -310,6 +311,7 @@ static void blk_root_detach(BdrvChild *child) > notifier->detach_aio_context, > notifier->opaque); > } > + return false; > } > > static AioContext *blk_root_get_parent_aio_context(BdrvChild *c) > diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h > index 5a04c778e4..dd058c1fd8 100644 > --- a/include/block/block_int-common.h > +++ b/include/block/block_int-common.h > @@ -857,8 +857,12 @@ struct BdrvChildClass { > void (*activate)(BdrvChild *child, Error **errp); > int (*inactivate)(BdrvChild *child); > > - void (*attach)(BdrvChild *child); > - void (*detach)(BdrvChild *child); > + /* > + * Return true if the graph wrlock is taken/released, What does "taken/released" mean? Does it mean released by attach and taken by detach? Also, please document which locks are held when these callbacks are invoked. > + * false if the wrlock state is not changed. > + */ > + bool (*attach)(BdrvChild *child); > + bool (*detach)(BdrvChild *child); > > /* > * Notifies the parent that the filename of its child has changed (e.g. > -- > 2.31.1 >
Am 28/04/2022 um 15:55 schrieb Stefan Hajnoczi: > On Tue, Apr 26, 2022 at 04:51:11AM -0400, Emanuele Giuseppe Esposito wrote: >> The only problem here is ->attach and ->detach callbacks >> could call bdrv_{un}apply_subtree_drain(), which itself >> will use a rdlock to navigate through all nodes. >> To avoid deadlocks, take the lock only outside the drains, >> and if we need to both attach and detach, do it in a single >> critical section. >> >> Therefore change ->attach and ->detach to return true if they >> are modifying the lock, so that we don't take it twice or release >> temporarly. >> >> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> >> --- >> block.c | 31 +++++++++++++++++++++++++++---- >> block/block-backend.c | 6 ++++-- >> include/block/block_int-common.h | 8 ++++++-- >> 3 files changed, 37 insertions(+), 8 deletions(-) >> >> diff --git a/block.c b/block.c >> index b2eb679abb..6cd87e8dd3 100644 >> --- a/block.c >> +++ b/block.c >> @@ -1434,21 +1434,26 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, >> *child_flags = flags; >> } >> >> -static void bdrv_child_cb_attach(BdrvChild *child) >> +static bool bdrv_child_cb_attach(BdrvChild *child) >> { >> BlockDriverState *bs = child->opaque; >> >> assert_bdrv_graph_writable(bs); >> QLIST_INSERT_HEAD(&bs->children, child, next); >> >> + /* Paired with bdrv_graph_wrlock() in bdrv_replace_child_noperm */ >> + bdrv_graph_wrunlock(); >> + >> if (child->role & BDRV_CHILD_COW) { >> bdrv_backing_attach(child); >> } >> >> bdrv_apply_subtree_drain(child, bs); >> + >> + return true; >> } >> >> -static void bdrv_child_cb_detach(BdrvChild *child) >> +static bool bdrv_child_cb_detach(BdrvChild *child) >> { >> BlockDriverState *bs = child->opaque; >> >> @@ -1458,8 +1463,13 @@ static void bdrv_child_cb_detach(BdrvChild *child) >> >> bdrv_unapply_subtree_drain(child, bs); >> >> + /* Paired with bdrv_graph_wrunlock() in bdrv_replace_child_noperm */ >> + bdrv_graph_wrlock(); >> + >> assert_bdrv_graph_writable(bs); >> QLIST_REMOVE(child, next); >> + >> + return true; >> } >> >> static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, >> @@ -2842,6 +2852,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, >> BlockDriverState *old_bs = child->bs; >> int new_bs_quiesce_counter; >> int drain_saldo; >> + bool locked = false; >> >> assert(!child->frozen); >> assert(old_bs != new_bs); >> @@ -2868,8 +2879,12 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, >> * are already gone and we only end the drain sections that came from >> * elsewhere. */ >> if (child->klass->detach) { >> - child->klass->detach(child); >> + locked = child->klass->detach(child); >> + } >> + if (!locked) { >> + bdrv_graph_wrlock(); >> } >> + locked = true; >> assert_bdrv_graph_writable(old_bs); >> QLIST_REMOVE(child, next_parent); >> } >> @@ -2880,6 +2895,10 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, >> } >> >> if (new_bs) { >> + if (!locked) { >> + bdrv_graph_wrlock(); >> + locked = true; >> + } >> assert_bdrv_graph_writable(new_bs); >> QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); >> >> @@ -2896,10 +2915,14 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, >> * drain sections coming from @child don't get an extra .drained_begin >> * callback. */ >> if (child->klass->attach) { >> - child->klass->attach(child); >> + locked = !(child->klass->attach(child)); > > O_O I don't understand what the return value of ->attach() means. It has > the opposite meaning to the return value of ->detach()? It means "state of the lock changed". So for ->attach(), if it is changed (went to unlock), we want locked = false. I will probably switch to Paolo's suggestion, it's cleaner. > >> } >> } >> >> + if (locked) { >> + bdrv_graph_wrunlock(); >> + } >> + >> /* >> * If the old child node was drained but the new one is not, allow >> * requests to come in only after the new node has been attached. >> diff --git a/block/block-backend.c b/block/block-backend.c >> index e0e1aff4b1..5dbd9fceae 100644 >> --- a/block/block-backend.c >> +++ b/block/block-backend.c >> @@ -282,7 +282,7 @@ static int blk_root_inactivate(BdrvChild *child) >> return 0; >> } >> >> -static void blk_root_attach(BdrvChild *child) >> +static bool blk_root_attach(BdrvChild *child) >> { >> BlockBackend *blk = child->opaque; >> BlockBackendAioNotifier *notifier; >> @@ -295,9 +295,10 @@ static void blk_root_attach(BdrvChild *child) >> notifier->detach_aio_context, >> notifier->opaque); >> } >> + return false; >> } >> >> -static void blk_root_detach(BdrvChild *child) >> +static bool blk_root_detach(BdrvChild *child) >> { >> BlockBackend *blk = child->opaque; >> BlockBackendAioNotifier *notifier; >> @@ -310,6 +311,7 @@ static void blk_root_detach(BdrvChild *child) >> notifier->detach_aio_context, >> notifier->opaque); >> } >> + return false; >> } >> >> static AioContext *blk_root_get_parent_aio_context(BdrvChild *c) >> diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h >> index 5a04c778e4..dd058c1fd8 100644 >> --- a/include/block/block_int-common.h >> +++ b/include/block/block_int-common.h >> @@ -857,8 +857,12 @@ struct BdrvChildClass { >> void (*activate)(BdrvChild *child, Error **errp); >> int (*inactivate)(BdrvChild *child); >> >> - void (*attach)(BdrvChild *child); >> - void (*detach)(BdrvChild *child); >> + /* >> + * Return true if the graph wrlock is taken/released, > > What does "taken/released" mean? Does it mean released by attach and > taken by detach? Yes > > Also, please document which locks are held when these callbacks are > invoked. > >> + * false if the wrlock state is not changed. >> + */ >> + bool (*attach)(BdrvChild *child); >> + bool (*detach)(BdrvChild *child); >> >> /* >> * Notifies the parent that the filename of its child has changed (e.g. >> -- >> 2.31.1 >>
diff --git a/block.c b/block.c index b2eb679abb..6cd87e8dd3 100644 --- a/block.c +++ b/block.c @@ -1434,21 +1434,26 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, *child_flags = flags; } -static void bdrv_child_cb_attach(BdrvChild *child) +static bool bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; assert_bdrv_graph_writable(bs); QLIST_INSERT_HEAD(&bs->children, child, next); + /* Paired with bdrv_graph_wrlock() in bdrv_replace_child_noperm */ + bdrv_graph_wrunlock(); + if (child->role & BDRV_CHILD_COW) { bdrv_backing_attach(child); } bdrv_apply_subtree_drain(child, bs); + + return true; } -static void bdrv_child_cb_detach(BdrvChild *child) +static bool bdrv_child_cb_detach(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -1458,8 +1463,13 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_unapply_subtree_drain(child, bs); + /* Paired with bdrv_graph_wrunlock() in bdrv_replace_child_noperm */ + bdrv_graph_wrlock(); + assert_bdrv_graph_writable(bs); QLIST_REMOVE(child, next); + + return true; } static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, @@ -2842,6 +2852,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; int drain_saldo; + bool locked = false; assert(!child->frozen); assert(old_bs != new_bs); @@ -2868,8 +2879,12 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, * are already gone and we only end the drain sections that came from * elsewhere. */ if (child->klass->detach) { - child->klass->detach(child); + locked = child->klass->detach(child); + } + if (!locked) { + bdrv_graph_wrlock(); } + locked = true; assert_bdrv_graph_writable(old_bs); QLIST_REMOVE(child, next_parent); } @@ -2880,6 +2895,10 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, } if (new_bs) { + if (!locked) { + bdrv_graph_wrlock(); + locked = true; + } assert_bdrv_graph_writable(new_bs); QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); @@ -2896,10 +2915,14 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, * drain sections coming from @child don't get an extra .drained_begin * callback. */ if (child->klass->attach) { - child->klass->attach(child); + locked = !(child->klass->attach(child)); } } + if (locked) { + bdrv_graph_wrunlock(); + } + /* * If the old child node was drained but the new one is not, allow * requests to come in only after the new node has been attached. diff --git a/block/block-backend.c b/block/block-backend.c index e0e1aff4b1..5dbd9fceae 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -282,7 +282,7 @@ static int blk_root_inactivate(BdrvChild *child) return 0; } -static void blk_root_attach(BdrvChild *child) +static bool blk_root_attach(BdrvChild *child) { BlockBackend *blk = child->opaque; BlockBackendAioNotifier *notifier; @@ -295,9 +295,10 @@ static void blk_root_attach(BdrvChild *child) notifier->detach_aio_context, notifier->opaque); } + return false; } -static void blk_root_detach(BdrvChild *child) +static bool blk_root_detach(BdrvChild *child) { BlockBackend *blk = child->opaque; BlockBackendAioNotifier *notifier; @@ -310,6 +311,7 @@ static void blk_root_detach(BdrvChild *child) notifier->detach_aio_context, notifier->opaque); } + return false; } static AioContext *blk_root_get_parent_aio_context(BdrvChild *c) diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 5a04c778e4..dd058c1fd8 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -857,8 +857,12 @@ struct BdrvChildClass { void (*activate)(BdrvChild *child, Error **errp); int (*inactivate)(BdrvChild *child); - void (*attach)(BdrvChild *child); - void (*detach)(BdrvChild *child); + /* + * Return true if the graph wrlock is taken/released, + * false if the wrlock state is not changed. + */ + bool (*attach)(BdrvChild *child); + bool (*detach)(BdrvChild *child); /* * Notifies the parent that the filename of its child has changed (e.g.
The only problem here is ->attach and ->detach callbacks could call bdrv_{un}apply_subtree_drain(), which itself will use a rdlock to navigate through all nodes. To avoid deadlocks, take the lock only outside the drains, and if we need to both attach and detach, do it in a single critical section. Therefore change ->attach and ->detach to return true if they are modifying the lock, so that we don't take it twice or release temporarly. Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> --- block.c | 31 +++++++++++++++++++++++++++---- block/block-backend.c | 6 ++++-- include/block/block_int-common.h | 8 ++++++-- 3 files changed, 37 insertions(+), 8 deletions(-)