Message ID | 20230803213711.689308-2-cascardo@canonical.com |
---|---|
State | New |
Headers | show |
Series | [SRU,Jammy,1/1] io_uring: ensure IOPOLL locks around deferred work | expand |
On 03.08.23 23:37, Thadeu Lima de Souza Cascardo wrote: > From: Jens Axboe <axboe@kernel.dk> > > No direct upstream commit exists for this issue. It was fixed in > 5.18 as part of a larger rework of the completion side. > > io_commit_cqring() writes the CQ ring tail to make it visible, but it > also kicks off any deferred work we have. A ring setup with IOPOLL > does not need any locking around the CQ ring updates, as we're always > under the ctx uring_lock. But if we have deferred work that needs > processing, then io_queue_deferred() assumes that the completion_lock > is held, as it is for !IOPOLL. > > Add a lockdep assertion to check and document this fact, and have > io_iopoll_complete() check if we have deferred work and run that > separately with the appropriate lock grabbed. > > Cc: stable@vger.kernel.org # 5.10, 5.15 > Reported-by: dghost david <daviduniverse18@gmail.com> > Signed-off-by: Jens Axboe <axboe@kernel.dk> > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > (cherry picked from commit fb348857e7b67eefe365052f1423427b66dedbf3) I suspect we should add a linux-5.15.y to the line above... > CVE-2023-21400 > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> > --- > io_uring/io_uring.c | 25 +++++++++++++++++++++---- > 1 file changed, 21 insertions(+), 4 deletions(-) > > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c > index cefa14592e9f..4cde8aee1c75 100644 > --- a/io_uring/io_uring.c > +++ b/io_uring/io_uring.c > @@ -1523,6 +1523,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) > > static void io_queue_deferred(struct io_ring_ctx *ctx) > { > + lockdep_assert_held(&ctx->completion_lock); > + > while (!list_empty(&ctx->defer_list)) { > struct io_defer_entry *de = list_first_entry(&ctx->defer_list, > struct io_defer_entry, list); > @@ -1574,14 +1576,24 @@ static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) > io_queue_deferred(ctx); > } > > -static inline void io_commit_cqring(struct io_ring_ctx *ctx) > +static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx) > +{ > + return ctx->off_timeout_used || ctx->drain_active; > +} > + > +static inline void __io_commit_cqring(struct io_ring_ctx *ctx) > { > - if (unlikely(ctx->off_timeout_used || ctx->drain_active)) > - __io_commit_cqring_flush(ctx); > /* order cqe stores with ring update */ > smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); > } > > +static inline void io_commit_cqring(struct io_ring_ctx *ctx) > +{ > + if (unlikely(io_commit_needs_flush(ctx))) > + __io_commit_cqring_flush(ctx); > + __io_commit_cqring(ctx); > +} > + > static inline bool io_sqring_full(struct io_ring_ctx *ctx) > { > struct io_rings *r = ctx->rings; > @@ -2520,7 +2532,12 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, > io_req_free_batch(&rb, req, &ctx->submit_state); > } > > - io_commit_cqring(ctx); > + if (io_commit_needs_flush(ctx)) { > + spin_lock(&ctx->completion_lock); > + __io_commit_cqring_flush(ctx); > + spin_unlock(&ctx->completion_lock); > + } > + __io_commit_cqring(ctx); > io_cqring_ev_posted_iopoll(ctx); > io_req_free_batch_finish(ctx, &rb); > }
On Fri, Aug 04, 2023 at 10:21:37AM +0200, Stefan Bader wrote: > On 03.08.23 23:37, Thadeu Lima de Souza Cascardo wrote: > > From: Jens Axboe <axboe@kernel.dk> > > > > No direct upstream commit exists for this issue. It was fixed in > > 5.18 as part of a larger rework of the completion side. > > > > io_commit_cqring() writes the CQ ring tail to make it visible, but it > > also kicks off any deferred work we have. A ring setup with IOPOLL > > does not need any locking around the CQ ring updates, as we're always > > under the ctx uring_lock. But if we have deferred work that needs > > processing, then io_queue_deferred() assumes that the completion_lock > > is held, as it is for !IOPOLL. > > > > Add a lockdep assertion to check and document this fact, and have > > io_iopoll_complete() check if we have deferred work and run that > > separately with the appropriate lock grabbed. > > > > Cc: stable@vger.kernel.org # 5.10, 5.15 > > Reported-by: dghost david <daviduniverse18@gmail.com> > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > > (cherry picked from commit fb348857e7b67eefe365052f1423427b66dedbf3) > > I suspect we should add a linux-5.15.y to the line above... > That's correct. This has come from linux-5.15.y. Cascardo. > > CVE-2023-21400 > > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> > > --- > > io_uring/io_uring.c | 25 +++++++++++++++++++++---- > > 1 file changed, 21 insertions(+), 4 deletions(-) > > > > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c > > index cefa14592e9f..4cde8aee1c75 100644 > > --- a/io_uring/io_uring.c > > +++ b/io_uring/io_uring.c > > @@ -1523,6 +1523,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) > > static void io_queue_deferred(struct io_ring_ctx *ctx) > > { > > + lockdep_assert_held(&ctx->completion_lock); > > + > > while (!list_empty(&ctx->defer_list)) { > > struct io_defer_entry *de = list_first_entry(&ctx->defer_list, > > struct io_defer_entry, list); > > @@ -1574,14 +1576,24 @@ static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) > > io_queue_deferred(ctx); > > } > > -static inline void io_commit_cqring(struct io_ring_ctx *ctx) > > +static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx) > > +{ > > + return ctx->off_timeout_used || ctx->drain_active; > > +} > > + > > +static inline void __io_commit_cqring(struct io_ring_ctx *ctx) > > { > > - if (unlikely(ctx->off_timeout_used || ctx->drain_active)) > > - __io_commit_cqring_flush(ctx); > > /* order cqe stores with ring update */ > > smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); > > } > > +static inline void io_commit_cqring(struct io_ring_ctx *ctx) > > +{ > > + if (unlikely(io_commit_needs_flush(ctx))) > > + __io_commit_cqring_flush(ctx); > > + __io_commit_cqring(ctx); > > +} > > + > > static inline bool io_sqring_full(struct io_ring_ctx *ctx) > > { > > struct io_rings *r = ctx->rings; > > @@ -2520,7 +2532,12 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, > > io_req_free_batch(&rb, req, &ctx->submit_state); > > } > > - io_commit_cqring(ctx); > > + if (io_commit_needs_flush(ctx)) { > > + spin_lock(&ctx->completion_lock); > > + __io_commit_cqring_flush(ctx); > > + spin_unlock(&ctx->completion_lock); > > + } > > + __io_commit_cqring(ctx); > > io_cqring_ev_posted_iopoll(ctx); > > io_req_free_batch_finish(ctx, &rb); > > } > > -- > - Stefan >
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cefa14592e9f..4cde8aee1c75 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1523,6 +1523,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) static void io_queue_deferred(struct io_ring_ctx *ctx) { + lockdep_assert_held(&ctx->completion_lock); + while (!list_empty(&ctx->defer_list)) { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); @@ -1574,14 +1576,24 @@ static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) io_queue_deferred(ctx); } -static inline void io_commit_cqring(struct io_ring_ctx *ctx) +static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx) +{ + return ctx->off_timeout_used || ctx->drain_active; +} + +static inline void __io_commit_cqring(struct io_ring_ctx *ctx) { - if (unlikely(ctx->off_timeout_used || ctx->drain_active)) - __io_commit_cqring_flush(ctx); /* order cqe stores with ring update */ smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); } +static inline void io_commit_cqring(struct io_ring_ctx *ctx) +{ + if (unlikely(io_commit_needs_flush(ctx))) + __io_commit_cqring_flush(ctx); + __io_commit_cqring(ctx); +} + static inline bool io_sqring_full(struct io_ring_ctx *ctx) { struct io_rings *r = ctx->rings; @@ -2520,7 +2532,12 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, io_req_free_batch(&rb, req, &ctx->submit_state); } - io_commit_cqring(ctx); + if (io_commit_needs_flush(ctx)) { + spin_lock(&ctx->completion_lock); + __io_commit_cqring_flush(ctx); + spin_unlock(&ctx->completion_lock); + } + __io_commit_cqring(ctx); io_cqring_ev_posted_iopoll(ctx); io_req_free_batch_finish(ctx, &rb); }