Message ID | 1271279826-30294-3-git-send-email-jmoyer@redhat.com |
---|---|
State | New, archived |
Headers | show |
On Wed, Apr 14, 2010 at 05:17:04PM -0400, Jeff Moyer wrote: > This patch implements a blk_yield to allow a process to voluntarily > give up its I/O scheduler time slice. This is desirable for those processes > which know that they will be blocked on I/O from another process, such as > the file system journal thread. Following patches will put calls to blk_yield > into jbd and jbd2. > > Signed-off-by: Jeff Moyer <jmoyer@redhat.com> > --- > block/blk-core.c | 6 ++++ > block/cfq-iosched.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++ > block/elevator.c | 8 +++++ > include/linux/blkdev.h | 1 + > include/linux/elevator.h | 3 ++ > 5 files changed, 88 insertions(+), 0 deletions(-) > > diff --git a/block/blk-core.c b/block/blk-core.c > index 9fe174d..3e4e98c 100644 > --- a/block/blk-core.c > +++ b/block/blk-core.c > @@ -323,6 +323,12 @@ void blk_unplug(struct request_queue *q) > } > EXPORT_SYMBOL(blk_unplug); > > +void blk_yield(struct request_queue *q) > +{ > + elv_yield(q); > +} > +EXPORT_SYMBOL(blk_yield); > + > /** > * blk_start_queue - restart a previously stopped queue > * @q: The &struct request_queue in question > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c > index ef59ab3..8a300ab 100644 > --- a/block/cfq-iosched.c > +++ b/block/cfq-iosched.c > @@ -292,6 +292,7 @@ struct cfq_data { > }; > > static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); > +static void cfq_yield_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq); > > static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, > enum wl_prio_t prio, > @@ -320,6 +321,7 @@ enum cfqq_state_flags { > CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */ > CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */ > CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */ > + CFQ_CFQQ_FLAG_yield, /* Allow another cfqq to run */ > }; > > #define CFQ_CFQQ_FNS(name) \ > @@ -349,6 +351,7 @@ CFQ_CFQQ_FNS(coop); > CFQ_CFQQ_FNS(split_coop); > CFQ_CFQQ_FNS(deep); > CFQ_CFQQ_FNS(wait_busy); > +CFQ_CFQQ_FNS(yield); > #undef CFQ_CFQQ_FNS > > #ifdef CONFIG_DEBUG_CFQ_IOSCHED > @@ -1566,6 +1569,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, > > cfq_clear_cfqq_wait_request(cfqq); > cfq_clear_cfqq_wait_busy(cfqq); > + cfq_clear_cfqq_yield(cfqq); > > /* > * If this cfqq is shared between multiple processes, check to > @@ -1887,6 +1891,9 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) > > cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; > cfqq->nr_sectors += blk_rq_sectors(rq); > + > + if (cfq_cfqq_yield(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) > + cfq_yield_cfqq(cfqd, cfqq); Jeff, I am wondering if cfq_select_queue() will be a better place for yielding the queue. if (cfq_cfqq_yield(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) goto expire; We can avoid one unnecessary __blk_run_queue(). Apart from above minor nit, it looks good to me. Acked-by: Vivek Goyal <vgoyal@redhat.com> Thanks Vivek > } > > /* > @@ -2191,6 +2198,68 @@ keep_queue: > return cfqq; > } > > +static void cfq_yield_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) > +{ > + __cfq_slice_expired(cfqd, cfqq, 1); > + __blk_run_queue(cfqd->queue); > +} > + > +static void cfq_yield(struct request_queue *q) > +{ > + struct cfq_data *cfqd = q->elevator->elevator_data; > + struct cfq_io_context *cic; > + struct cfq_queue *cfqq; > + > + cic = cfq_cic_lookup(cfqd, current->io_context); > + if (!cic) > + return; > + > + spin_lock_irq(q->queue_lock); > + > + /* > + * This is primarily called to ensure that the long synchronous > + * time slice does not prevent other I/O happenning (like journal > + * commits) while we idle waiting for it. Thus, check to see if the > + * current cfqq is the sync cfqq for this process. > + */ > + cfqq = cic_to_cfqq(cic, 1); > + if (!cfqq) > + goto out_unlock; > + > + if (cfqd->active_queue != cfqq) > + goto out_unlock; > + > + /* > + * If we are currently servicing the SYNC_NOIDLE_WORKLOAD, and we > + * are idling on the last queue in that workload, *and* the average > + * think time is larger thank the remaining slice time, go ahead > + * and yield the queue. Otherwise, don't yield so that fsync-heavy > + * workloads don't starve out the sync-noidle workload. > + */ > + if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && > + (!sample_valid(cfqq->service_tree->ttime_samples) || > + cfqq->slice_end - jiffies > cfqq->service_tree->ttime_mean)) > + goto out_unlock; > + > + > + cfq_log_cfqq(cfqd, cfqq, "yielding queue"); > + > + /* > + * If there are other requests pending, just mark the queue as > + * yielding and give up our slice after the last request is > + * dispatched. > + */ > + if (!RB_EMPTY_ROOT(&cfqq->sort_list)) { > + cfq_mark_cfqq_yield(cfqq); > + goto out_unlock; > + } > + > + cfq_yield_cfqq(cfqd, cfqq); > + > +out_unlock: > + spin_unlock_irq(q->queue_lock); > +} > + > static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) > { > int dispatched = 0; > @@ -3911,6 +3980,7 @@ static struct elevator_type iosched_cfq = { > .elevator_deactivate_req_fn = cfq_deactivate_request, > .elevator_queue_empty_fn = cfq_queue_empty, > .elevator_completed_req_fn = cfq_completed_request, > + .elevator_yield_fn = cfq_yield, > .elevator_former_req_fn = elv_rb_former_request, > .elevator_latter_req_fn = elv_rb_latter_request, > .elevator_set_req_fn = cfq_set_request, > diff --git a/block/elevator.c b/block/elevator.c > index 76e3702..6b16421 100644 > --- a/block/elevator.c > +++ b/block/elevator.c > @@ -855,6 +855,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq) > } > } > > +void elv_yield(struct request_queue *q) > +{ > + struct elevator_queue *e = q->elevator; > + > + if (e && e->ops->elevator_yield_fn) > + e->ops->elevator_yield_fn(q); > +} > + > #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) > > static ssize_t > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index 6690e8b..0e749e2 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -833,6 +833,7 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, > extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, > struct request *, int, rq_end_io_fn *); > extern void blk_unplug(struct request_queue *q); > +extern void blk_yield(struct request_queue *q); > > static inline struct request_queue *bdev_get_queue(struct block_device *bdev) > { > diff --git a/include/linux/elevator.h b/include/linux/elevator.h > index 1cb3372..9b4e2e9 100644 > --- a/include/linux/elevator.h > +++ b/include/linux/elevator.h > @@ -20,6 +20,7 @@ typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); > typedef int (elevator_queue_empty_fn) (struct request_queue *); > typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); > typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); > +typedef void (elevator_yield_fn) (struct request_queue *); > typedef int (elevator_may_queue_fn) (struct request_queue *, int); > > typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t); > @@ -44,6 +45,7 @@ struct elevator_ops > > elevator_queue_empty_fn *elevator_queue_empty_fn; > elevator_completed_req_fn *elevator_completed_req_fn; > + elevator_yield_fn *elevator_yield_fn; > > elevator_request_list_fn *elevator_former_req_fn; > elevator_request_list_fn *elevator_latter_req_fn; > @@ -105,6 +107,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *, > extern void elv_merged_request(struct request_queue *, struct request *, int); > extern void elv_requeue_request(struct request_queue *, struct request *); > extern int elv_queue_empty(struct request_queue *); > +extern void elv_yield(struct request_queue *); > extern struct request *elv_former_request(struct request_queue *, struct request *); > extern struct request *elv_latter_request(struct request_queue *, struct request *); > extern int elv_register_queue(struct request_queue *q); > -- > 1.6.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 14 2010, Vivek Goyal wrote: > On Wed, Apr 14, 2010 at 05:17:04PM -0400, Jeff Moyer wrote: > > This patch implements a blk_yield to allow a process to voluntarily > > give up its I/O scheduler time slice. This is desirable for those processes > > which know that they will be blocked on I/O from another process, such as > > the file system journal thread. Following patches will put calls to blk_yield > > into jbd and jbd2. > > > > Signed-off-by: Jeff Moyer <jmoyer@redhat.com> > > --- > > block/blk-core.c | 6 ++++ > > block/cfq-iosched.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++ > > block/elevator.c | 8 +++++ > > include/linux/blkdev.h | 1 + > > include/linux/elevator.h | 3 ++ > > 5 files changed, 88 insertions(+), 0 deletions(-) > > > > diff --git a/block/blk-core.c b/block/blk-core.c > > index 9fe174d..3e4e98c 100644 > > --- a/block/blk-core.c > > +++ b/block/blk-core.c > > @@ -323,6 +323,12 @@ void blk_unplug(struct request_queue *q) > > } > > EXPORT_SYMBOL(blk_unplug); > > > > +void blk_yield(struct request_queue *q) > > +{ > > + elv_yield(q); > > +} > > +EXPORT_SYMBOL(blk_yield); > > + > > /** > > * blk_start_queue - restart a previously stopped queue > > * @q: The &struct request_queue in question > > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c > > index ef59ab3..8a300ab 100644 > > --- a/block/cfq-iosched.c > > +++ b/block/cfq-iosched.c > > @@ -292,6 +292,7 @@ struct cfq_data { > > }; > > > > static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); > > +static void cfq_yield_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq); > > > > static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, > > enum wl_prio_t prio, > > @@ -320,6 +321,7 @@ enum cfqq_state_flags { > > CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */ > > CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */ > > CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */ > > + CFQ_CFQQ_FLAG_yield, /* Allow another cfqq to run */ > > }; > > > > #define CFQ_CFQQ_FNS(name) \ > > @@ -349,6 +351,7 @@ CFQ_CFQQ_FNS(coop); > > CFQ_CFQQ_FNS(split_coop); > > CFQ_CFQQ_FNS(deep); > > CFQ_CFQQ_FNS(wait_busy); > > +CFQ_CFQQ_FNS(yield); > > #undef CFQ_CFQQ_FNS > > > > #ifdef CONFIG_DEBUG_CFQ_IOSCHED > > @@ -1566,6 +1569,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, > > > > cfq_clear_cfqq_wait_request(cfqq); > > cfq_clear_cfqq_wait_busy(cfqq); > > + cfq_clear_cfqq_yield(cfqq); > > > > /* > > * If this cfqq is shared between multiple processes, check to > > @@ -1887,6 +1891,9 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) > > > > cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; > > cfqq->nr_sectors += blk_rq_sectors(rq); > > + > > + if (cfq_cfqq_yield(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) > > + cfq_yield_cfqq(cfqd, cfqq); > > Jeff, > > I am wondering if cfq_select_queue() will be a better place for yielding > the queue. > > if (cfq_cfqq_yield(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) > goto expire; > > We can avoid one unnecessary __blk_run_queue(). Agree, doing it on insert is not the right place.
Jens Axboe <jens.axboe@oracle.com> writes: > On Wed, Apr 14 2010, Vivek Goyal wrote: >> > @@ -1887,6 +1891,9 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) >> > >> > cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; >> > cfqq->nr_sectors += blk_rq_sectors(rq); >> > + >> > + if (cfq_cfqq_yield(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) >> > + cfq_yield_cfqq(cfqd, cfqq); >> >> Jeff, >> >> I am wondering if cfq_select_queue() will be a better place for yielding >> the queue. >> >> if (cfq_cfqq_yield(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) >> goto expire; >> >> We can avoid one unnecessary __blk_run_queue(). > > Agree, doing it on insert is not the right place. I see where you're coming from, but that makes things quite a bit trickier. I look forward to the review of *that* patch. ;-) Cheers, Jeff -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/block/blk-core.c b/block/blk-core.c index 9fe174d..3e4e98c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -323,6 +323,12 @@ void blk_unplug(struct request_queue *q) } EXPORT_SYMBOL(blk_unplug); +void blk_yield(struct request_queue *q) +{ + elv_yield(q); +} +EXPORT_SYMBOL(blk_yield); + /** * blk_start_queue - restart a previously stopped queue * @q: The &struct request_queue in question diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ef59ab3..8a300ab 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -292,6 +292,7 @@ struct cfq_data { }; static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); +static void cfq_yield_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq); static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, enum wl_prio_t prio, @@ -320,6 +321,7 @@ enum cfqq_state_flags { CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */ CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */ CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */ + CFQ_CFQQ_FLAG_yield, /* Allow another cfqq to run */ }; #define CFQ_CFQQ_FNS(name) \ @@ -349,6 +351,7 @@ CFQ_CFQQ_FNS(coop); CFQ_CFQQ_FNS(split_coop); CFQ_CFQQ_FNS(deep); CFQ_CFQQ_FNS(wait_busy); +CFQ_CFQQ_FNS(yield); #undef CFQ_CFQQ_FNS #ifdef CONFIG_DEBUG_CFQ_IOSCHED @@ -1566,6 +1569,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfq_clear_cfqq_wait_request(cfqq); cfq_clear_cfqq_wait_busy(cfqq); + cfq_clear_cfqq_yield(cfqq); /* * If this cfqq is shared between multiple processes, check to @@ -1887,6 +1891,9 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; cfqq->nr_sectors += blk_rq_sectors(rq); + + if (cfq_cfqq_yield(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) + cfq_yield_cfqq(cfqd, cfqq); } /* @@ -2191,6 +2198,68 @@ keep_queue: return cfqq; } +static void cfq_yield_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + __cfq_slice_expired(cfqd, cfqq, 1); + __blk_run_queue(cfqd->queue); +} + +static void cfq_yield(struct request_queue *q) +{ + struct cfq_data *cfqd = q->elevator->elevator_data; + struct cfq_io_context *cic; + struct cfq_queue *cfqq; + + cic = cfq_cic_lookup(cfqd, current->io_context); + if (!cic) + return; + + spin_lock_irq(q->queue_lock); + + /* + * This is primarily called to ensure that the long synchronous + * time slice does not prevent other I/O happenning (like journal + * commits) while we idle waiting for it. Thus, check to see if the + * current cfqq is the sync cfqq for this process. + */ + cfqq = cic_to_cfqq(cic, 1); + if (!cfqq) + goto out_unlock; + + if (cfqd->active_queue != cfqq) + goto out_unlock; + + /* + * If we are currently servicing the SYNC_NOIDLE_WORKLOAD, and we + * are idling on the last queue in that workload, *and* the average + * think time is larger thank the remaining slice time, go ahead + * and yield the queue. Otherwise, don't yield so that fsync-heavy + * workloads don't starve out the sync-noidle workload. + */ + if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && + (!sample_valid(cfqq->service_tree->ttime_samples) || + cfqq->slice_end - jiffies > cfqq->service_tree->ttime_mean)) + goto out_unlock; + + + cfq_log_cfqq(cfqd, cfqq, "yielding queue"); + + /* + * If there are other requests pending, just mark the queue as + * yielding and give up our slice after the last request is + * dispatched. + */ + if (!RB_EMPTY_ROOT(&cfqq->sort_list)) { + cfq_mark_cfqq_yield(cfqq); + goto out_unlock; + } + + cfq_yield_cfqq(cfqd, cfqq); + +out_unlock: + spin_unlock_irq(q->queue_lock); +} + static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) { int dispatched = 0; @@ -3911,6 +3980,7 @@ static struct elevator_type iosched_cfq = { .elevator_deactivate_req_fn = cfq_deactivate_request, .elevator_queue_empty_fn = cfq_queue_empty, .elevator_completed_req_fn = cfq_completed_request, + .elevator_yield_fn = cfq_yield, .elevator_former_req_fn = elv_rb_former_request, .elevator_latter_req_fn = elv_rb_latter_request, .elevator_set_req_fn = cfq_set_request, diff --git a/block/elevator.c b/block/elevator.c index 76e3702..6b16421 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -855,6 +855,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq) } } +void elv_yield(struct request_queue *q) +{ + struct elevator_queue *e = q->elevator; + + if (e && e->ops->elevator_yield_fn) + e->ops->elevator_yield_fn(q); +} + #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) static ssize_t diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6690e8b..0e749e2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -833,6 +833,7 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); extern void blk_unplug(struct request_queue *q); +extern void blk_yield(struct request_queue *q); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1cb3372..9b4e2e9 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -20,6 +20,7 @@ typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); typedef int (elevator_queue_empty_fn) (struct request_queue *); typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); +typedef void (elevator_yield_fn) (struct request_queue *); typedef int (elevator_may_queue_fn) (struct request_queue *, int); typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t); @@ -44,6 +45,7 @@ struct elevator_ops elevator_queue_empty_fn *elevator_queue_empty_fn; elevator_completed_req_fn *elevator_completed_req_fn; + elevator_yield_fn *elevator_yield_fn; elevator_request_list_fn *elevator_former_req_fn; elevator_request_list_fn *elevator_latter_req_fn; @@ -105,6 +107,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *, extern void elv_merged_request(struct request_queue *, struct request *, int); extern void elv_requeue_request(struct request_queue *, struct request *); extern int elv_queue_empty(struct request_queue *); +extern void elv_yield(struct request_queue *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); extern int elv_register_queue(struct request_queue *q);
This patch implements a blk_yield to allow a process to voluntarily give up its I/O scheduler time slice. This is desirable for those processes which know that they will be blocked on I/O from another process, such as the file system journal thread. Following patches will put calls to blk_yield into jbd and jbd2. Signed-off-by: Jeff Moyer <jmoyer@redhat.com> --- block/blk-core.c | 6 ++++ block/cfq-iosched.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++ block/elevator.c | 8 +++++ include/linux/blkdev.h | 1 + include/linux/elevator.h | 3 ++ 5 files changed, 88 insertions(+), 0 deletions(-)