Message ID | 20190310070822.11564-4-viro@ZenIV.linux.org.uk |
---|---|
State | Not Applicable |
Delegated to: | David Miller |
Headers | show |
Series | [1/8] pin iocb through aio. | expand |
Where do we put the second iocb reference in case we return from vfs_poll without ever being woken? Also it seems like the complete code would still benefit from a little helper, something like: diff --git a/fs/aio.c b/fs/aio.c index b2a5c7b3a1fe..8415e5e484ce 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1611,6 +1611,13 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, return 0; } +static void aio_poll_finish(struct aio_kiocb *iocb, __poll_t mask) +{ + list_del_init(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + iocb->poll.done = true; +} + static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1635,9 +1642,7 @@ static void aio_poll_complete_work(struct work_struct *work) spin_unlock_irq(&ctx->ctx_lock); return; } - list_del_init(&iocb->ki_list); - iocb->ki_res.res = mangle_poll(mask); - req->done = true; + aio_poll_finish(iocb, mask); spin_unlock_irq(&ctx->ctx_lock); iocb_put(iocb); @@ -1674,24 +1679,20 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, list_del_init(&req->wait.entry); - if (mask) { + if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { /* * Try to complete the iocb inline if we can. Use * irqsave/irqrestore because not all filesystems (e.g. fuse) * call this function with IRQs disabled and because IRQs * have to be disabled before ctx_lock is obtained. */ - if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { - list_del(&iocb->ki_list); - iocb->ki_res.res = mangle_poll(mask); - req->done = true; - spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); - iocb_put(iocb); - return 1; - } + aio_poll_finish(iocb, mask); + spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); + iocb_put(iocb); + } else { + schedule_work(&req->work); } - schedule_work(&req->work); return 1; }
On Mon, Mar 11, 2019 at 08:58:31PM +0100, Christoph Hellwig wrote: > Where do we put the second iocb reference in case we return from > vfs_poll without ever being woken? Depends. If mask is non-zero (i.e. vfs_poll() has returned something we care about) and it has never been woken, we steal it and drop the reference ourselves. If it is zero and we see that ->poll() has tried to put it on two queues, we steal it (again, assuming it's not on waitqueue and _can_ be stolen) and return -EINVAL. In that case __io_submit_one() (or, by the end of the series, io_submit_one()) will call iocb_destroy(). And in the normal waiting case (nothing interesting reported and no errors) it will end up on the list of cancellables. Then it either will get completed by later wakeup, which will drop the reference, or it will get eventually cancelled, which will hit the same aio_poll_complete_work() and drop the reference... > Also it seems like the complete code would still benefit from a little > helper, something like: Umm... Not sure I like the name (something like aio_poll_done() seems to be better), but other than that - no problem.
On Mon, Mar 11, 2019 at 09:06:18PM +0000, Al Viro wrote: > On Mon, Mar 11, 2019 at 08:58:31PM +0100, Christoph Hellwig wrote: > > Where do we put the second iocb reference in case we return from > > vfs_poll without ever being woken? > > Depends. If mask is non-zero (i.e. vfs_poll() has returned something > we care about) and it has never been woken, we steal it and drop the > reference ourselves. If it is zero and we see that ->poll() has tried > to put it on two queues, we steal it (again, assuming it's not on > waitqueue and _can_ be stolen) and return -EINVAL. In that case > __io_submit_one() (or, by the end of the series, io_submit_one()) > will call iocb_destroy(). And in the normal waiting case (nothing > interesting reported and no errors) it will end up on the list of > cancellables. Then it either will get completed by later wakeup, which > will drop the reference, or it will get eventually cancelled, which will > hit the same aio_poll_complete_work() and drop the reference... Ok, seems like the logic is sane. I was missing how the actual mask logic worked in aio_poll(). > > Also it seems like the complete code would still benefit from a little > > helper, something like: > > Umm... Not sure I like the name (something like aio_poll_done() seems > to be better), but other than that - no problem. I don't care about the name. Feel free to change it to whatever suits you.
diff --git a/fs/aio.c b/fs/aio.c index b9c4c1894020..f47a29f7f201 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -181,7 +181,7 @@ struct poll_iocb { struct file *file; struct wait_queue_head *head; __poll_t events; - bool woken; + bool done; bool cancelled; struct wait_queue_entry wait; struct work_struct work; @@ -1606,12 +1606,6 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, return 0; } -static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) -{ - iocb->ki_res.res = mangle_poll(mask); - iocb_put(iocb); -} - static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1637,9 +1631,11 @@ static void aio_poll_complete_work(struct work_struct *work) return; } list_del_init(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + req->done = true; spin_unlock_irq(&ctx->ctx_lock); - aio_poll_complete(iocb, mask); + iocb_put(iocb); } /* assumes we are called with irqs disabled */ @@ -1671,7 +1667,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (mask && !(mask & req->events)) return 0; - req->woken = true; + list_del_init(&req->wait.entry); if (mask) { /* @@ -1682,15 +1678,14 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, */ if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { list_del(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + req->done = true; spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); - - list_del_init(&req->wait.entry); - aio_poll_complete(iocb, mask); + iocb_put(iocb); return 1; } } - list_del_init(&req->wait.entry); schedule_work(&req->work); return 1; } @@ -1723,6 +1718,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) struct kioctx *ctx = aiocb->ki_ctx; struct poll_iocb *req = &aiocb->poll; struct aio_poll_table apt; + bool cancel = false; __poll_t mask; /* reject any unknown events outside the normal event mask. */ @@ -1736,7 +1732,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->head = NULL; - req->woken = false; + req->done = false; req->cancelled = false; apt.pt._qproc = aio_poll_queue_proc; @@ -1749,36 +1745,33 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) init_waitqueue_func_entry(&req->wait, aio_poll_wake); mask = vfs_poll(req->file, &apt.pt) & req->events; - if (unlikely(!req->head)) { - /* we did not manage to set up a waitqueue, done */ - goto out; - } - spin_lock_irq(&ctx->ctx_lock); - spin_lock(&req->head->lock); - if (req->woken) { - /* wake_up context handles the rest */ - mask = 0; + if (likely(req->head)) { + spin_lock(&req->head->lock); + if (unlikely(list_empty(&req->wait.entry))) { + if (apt.error) + cancel = true; + apt.error = 0; + mask = 0; + } + if (mask || apt.error) { + list_del_init(&req->wait.entry); + } else if (cancel) { + WRITE_ONCE(req->cancelled, true); + } else if (!req->done) { /* actually waiting for an event */ + list_add_tail(&aiocb->ki_list, &ctx->active_reqs); + aiocb->ki_cancel = aio_poll_cancel; + } + spin_unlock(&req->head->lock); + } + if (mask) { /* no async, we'd stolen it */ + aiocb->ki_res.res = mangle_poll(mask); apt.error = 0; - } else if (mask || apt.error) { - /* if we get an error or a mask we are done */ - WARN_ON_ONCE(list_empty(&req->wait.entry)); - list_del_init(&req->wait.entry); - } else { - /* actually waiting for an event */ - list_add_tail(&aiocb->ki_list, &ctx->active_reqs); - aiocb->ki_cancel = aio_poll_cancel; } - spin_unlock(&req->head->lock); spin_unlock_irq(&ctx->ctx_lock); - -out: - if (unlikely(apt.error)) - return apt.error; - if (mask) - aio_poll_complete(aiocb, mask); - return 0; + iocb_put(aiocb); + return apt.error; } static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,