===================================================================
@@ -75,6 +75,8 @@ static DEFINE_IDA(cic_index_ida);
#define sample_valid(samples) ((samples) > 80)
#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
+static void cfq_put_request(struct request *rq);
+
/*
* Most of our rbtree usage is for sorting with min extraction, so
* if we cache the leftmost node we don't have to walk down the tree
@@ -148,6 +150,12 @@ struct cfq_queue {
struct cfq_group *cfqg;
/* Number of sectors dispatched from queue in single dispatch round */
unsigned long nr_sectors;
+
+ /*
+ * This cfqq's further IO is dependent on other IO queued in other
+ * cfqq pointed by dep_on_cfqq.
+ */
+ struct cfq_queue *depends_on;
};
/*
@@ -447,7 +455,7 @@ static inline int cfqg_busy_async_queues
+ cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
}
-static void cfq_dispatch_insert(struct request_queue *, struct request *);
+static void cfq_dispatch_insert(struct request_queue *, struct request *, bool);
static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
struct io_context *, gfp_t);
static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
@@ -2043,16 +2051,23 @@ static void cfq_arm_slice_timer(struct c
/*
* Move request from internal lists to the request queue dispatch list.
+ * @rq_dequeued: rq to be dispatched has already been removed from associated
+ * cfqq. This is useful when rq from dependent queue is being
+ * dispatched in current queue context.
*/
-static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
+static void cfq_dispatch_insert(struct request_queue *q, struct request *rq,
+ bool rq_dequeued)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_queue *cfqq = RQ_CFQQ(rq);
cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
- cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
- cfq_remove_request(rq);
+ if (!rq_dequeued) {
+ cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
+ cfq_remove_request(rq);
+ }
+
cfqq->dispatched++;
(RQ_CFQG(rq))->dispatched++;
elv_dispatch_sort(q, rq);
@@ -2332,6 +2347,10 @@ static struct cfq_queue *cfq_select_queu
if (!RB_EMPTY_ROOT(&cfqq->sort_list))
goto keep_queue;
+ /* There are reuquests in the cfqq we depend on. Allow dispatch */
+ if (cfqq->depends_on && !RB_EMPTY_ROOT(&cfqq->depends_on->sort_list))
+ goto keep_queue;
+
/*
* If another queue has a request waiting within our mean seek
* distance, let it run. The expire code will check for close
@@ -2402,7 +2421,7 @@ static int __cfq_forced_dispatch_cfqq(st
int dispatched = 0;
while (cfqq->next_rq) {
- cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
+ cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq, false);
dispatched++;
}
@@ -2534,6 +2553,77 @@ static bool cfq_may_dispatch(struct cfq_
}
/*
+ * This queue was not active and we might expire it becaue its request got
+ * dispatched in some other queue's context and it is an empty queue now
+ */
+static void
+cfq_expire_inactive_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+ /*
+ * If this cfqq is shared between multiple processes, check to
+ * make sure that those processes are still issuing I/Os within
+ * the mean seek distance. If not, it may be time to break the
+ * queues apart again.
+ */
+ if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
+ cfq_mark_cfqq_split_coop(cfqq);
+
+ cfq_del_cfqq_rr(cfqd, cfqq);
+}
+
+static void cfq_set_dispatch_dependent_request(struct cfq_data *cfqd,
+ struct cfq_queue *cfqq)
+{
+ struct cfq_queue *dep_cfqq;
+ int rw;
+ struct request *rq;
+
+ dep_cfqq = cfqq->depends_on;
+
+ cfq_log_cfqq(cfqd, cfqq, "dispatch from dependent"
+ " queue pid=%d\n", dep_cfqq->pid);
+ /*
+ * Select a request from the queue we are dependent on. Dequeue
+ * the request from other queue and make rq belong to this
+ * queue and dispatch in this queue's context
+ */
+ rq = cfq_check_fifo(dep_cfqq);
+ if (!rq)
+ rq = dep_cfqq->next_rq;
+ cfq_remove_request(rq);
+ if (RB_EMPTY_ROOT(&dep_cfqq->sort_list))
+ cfq_expire_inactive_queue(cfqd, dep_cfqq);
+ /*
+ * Change the identity of request to belong to current cfqq
+ * cfqg and cic. Drop references to old cfqq, cfqg and cic.
+ */
+ cfqq->ref++;
+ rw = rq_data_dir(rq);
+ cfqq->allocated[rw]++;
+
+ /*
+ * If we are here that means we are idling on the queue and we
+ * must have dispatched atleast one request and that must have
+ * set the cfqd->active_cic. Use that
+ */
+ BUG_ON(!cfqd->active_cic);
+ atomic_long_inc(&cfqd->active_cic->ioc->refcount);
+
+ cfq_put_request(rq);
+
+ rq->elevator_private[0] = cfqd->active_cic;
+ rq->elevator_private[1] = cfqq;
+ rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
+
+ if (cfq_cfqq_wait_request(cfqq))
+ cfq_del_timer(cfqd, cfqq);
+
+ cfq_clear_cfqq_wait_request(cfqq);
+
+ cfq_dispatch_insert(cfqd->queue, rq, true);
+}
+
+/*
* Dispatch a request from cfqq, moving them to the request queue
* dispatch list.
*/
@@ -2541,22 +2631,26 @@ static bool cfq_dispatch_request(struct
{
struct request *rq;
- BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
+ BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list) && !cfqq->depends_on);
if (!cfq_may_dispatch(cfqd, cfqq))
return false;
- /*
- * follow expired path, else get first next available
- */
- rq = cfq_check_fifo(cfqq);
- if (!rq)
- rq = cfqq->next_rq;
+ if (RB_EMPTY_ROOT(&cfqq->sort_list))
+ cfq_set_dispatch_dependent_request(cfqd, cfqq);
+ else {
+ /*
+ * follow expired path, else get first next available
+ */
+ rq = cfq_check_fifo(cfqq);
+ if (!rq)
+ rq = cfqq->next_rq;
- /*
- * insert request into driver dispatch list
- */
- cfq_dispatch_insert(cfqd->queue, rq);
+ /*
+ * insert request into driver dispatch list
+ */
+ cfq_dispatch_insert(cfqd->queue, rq, false);
+ }
if (!cfqd->active_cic) {
struct cfq_io_context *cic = RQ_CIC(rq);
@@ -2640,6 +2734,11 @@ static void cfq_put_queue(struct cfq_que
}
BUG_ON(cfq_cfqq_on_rr(cfqq));
+
+ /* This cfqq is going away. If there is a dependent queue, drop ref */
+ if (cfqq->depends_on)
+ cfq_put_queue(cfqq->depends_on);
+
kmem_cache_free(cfq_pool, cfqq);
cfq_put_cfqg(cfqg);
}
@@ -3670,6 +3769,111 @@ static int cfq_may_queue(struct request_
}
/*
+ * Calling task depends on "tsk" for further IO. It is caller's responsibility
+ * to make sure tsk pointer is valid during the execution of call
+ */
+static void
+cfq_set_depends_on_task(struct request_queue *q, struct task_struct *tsk)
+{
+ struct cfq_io_context *cic, *tsk_cic;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
+ struct cfq_queue *cfqq, *tsk_cfqq, *__cfqq;
+
+ if (unlikely(!tsk))
+ return;
+
+ if (!tsk->io_context || !current->io_context)
+ return;
+
+ /*
+ * If two processes belong to same cgroup, no need to do this as
+ * both journalling thread and fsync process will go on same
+ * service tree and be able to preempt each other
+ */
+ rcu_read_lock();
+ if (task_blkio_cgroup(current) == task_blkio_cgroup(tsk)) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+
+ cic = cfq_cic_lookup(cfqd, current->io_context);
+ if (!cic)
+ return;
+
+ task_lock(tsk);
+ spin_lock_irq(q->queue_lock);
+
+ cfqq = cic_to_cfqq(cic, 1);
+ if (!cfqq)
+ goto out_unlock;
+
+ /* If cfqq already has a dependent queue, ignore this new queue */
+ if (cfqq->depends_on) {
+ cfq_log_cfqq(cfqd, cfqq, "depends on queue already set"
+ " old_pid=%d", cfqq->depends_on->pid);
+ goto out_unlock;
+ }
+
+ if (!tsk->io_context)
+ goto out_unlock;
+
+ tsk_cic = cfq_cic_lookup(cfqd, tsk->io_context);
+ if (!tsk_cic)
+ goto out_unlock;
+
+ tsk_cfqq = cic_to_cfqq(tsk_cic, 1);
+
+ if (!tsk_cfqq)
+ goto out_unlock;
+
+ /* Don't allow circular dependency among a group of queues */
+ __cfqq = tsk_cfqq;
+
+ while((__cfqq = __cfqq->depends_on)) {
+ if (__cfqq == cfqq)
+ goto out_unlock;
+ }
+
+ /* Take reference on tasks' cfqq */
+ tsk_cfqq->ref++;
+ cfqq->depends_on = tsk_cfqq;
+
+ cfq_log_cfqq(cfqd, cfqq, "set depends on queue pid= %d", tsk->pid);
+
+out_unlock:
+ spin_unlock_irq(q->queue_lock);
+ task_unlock(tsk);
+}
+
+static void cfq_reset_depends_on_task(struct request_queue *q)
+{
+ struct cfq_io_context *cic;
+ struct cfq_data *cfqd = q->elevator->elevator_data;
+ struct cfq_queue *cfqq;
+
+ cic = cfq_cic_lookup(cfqd, current->io_context);
+ if (!cic)
+ return;
+
+ spin_lock_irq(q->queue_lock);
+
+ cfqq = cic_to_cfqq(cic, 1);
+ if (!cfqq || !cfqq->depends_on) {
+ spin_unlock_irq(q->queue_lock);
+ return;
+ }
+
+ cfq_log_cfqq(cfqd, cfqq, "reset depends on queue");
+
+ /* Drop refenrece on tasks's cfqq */
+ cfq_put_queue(cfqq->depends_on);
+ cfqq->depends_on = NULL;
+ spin_unlock_irq(q->queue_lock);
+
+}
+
+/*
* queue lock held here
*/
static void cfq_put_request(struct request *rq)
@@ -4206,6 +4410,8 @@ static struct elevator_type iosched_cfq
.elevator_set_req_fn = cfq_set_request,
.elevator_put_req_fn = cfq_put_request,
.elevator_may_queue_fn = cfq_may_queue,
+ .elevator_set_depends_on_task_fn = cfq_set_depends_on_task,
+ .elevator_reset_depends_on_task_fn = cfq_reset_depends_on_task,
.elevator_init_fn = cfq_init_queue,
.elevator_exit_fn = cfq_exit_queue,
.trim = cfq_free_io_context,
===================================================================
@@ -398,6 +398,19 @@ static int blk_init_free_list(struct req
return 0;
}
+static void
+generic_set_depends_on_task(struct request_queue *q, struct task_struct *tsk)
+{
+ if (q->elevator)
+ elv_set_depends_on_task(q, tsk);
+}
+
+static void generic_reset_depends_on_task(struct request_queue *q)
+{
+ if (q->elevator)
+ elv_reset_depends_on_task(q);
+}
+
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
{
return blk_alloc_queue_node(gfp_mask, -1);
@@ -534,6 +547,8 @@ blk_init_allocated_queue_node(struct req
q->prep_rq_fn = NULL;
q->unprep_rq_fn = NULL;
q->queue_flags = QUEUE_FLAG_DEFAULT;
+ q->set_depends_on_task_fn = generic_set_depends_on_task;
+ q->reset_depends_on_task_fn = generic_reset_depends_on_task;
/* Override internal queue lock with supplied lock pointer */
if (lock)
@@ -2766,6 +2781,33 @@ void blk_finish_plug(struct blk_plug *pl
}
EXPORT_SYMBOL(blk_finish_plug);
+/*
+ * Give a hint to CFQ that current task is dependent on IO coming from
+ * "tsk". In such cases, CFQ will allow dispatch from "tsk" queue in
+ * the time slice of "current" process and this can cut down on
+ * unnecessarily queue idling.
+ *
+ * This function will return with interrupts disabled in case of CFQ.
+ * (task_lock()/task_unlock() pair).
+ */
+void blk_set_depends_on_task(struct request_queue *q, struct task_struct *tsk)
+{
+ if (q->set_depends_on_task_fn)
+ q->set_depends_on_task_fn(q, tsk);
+}
+EXPORT_SYMBOL(blk_set_depends_on_task);
+
+/*
+ * Tear down the any dependent task mapping previously set up by the current
+ * task.
+ */
+void blk_reset_depends_on_task(struct request_queue *q)
+{
+ if (q->reset_depends_on_task_fn)
+ q->reset_depends_on_task_fn(q);
+}
+EXPORT_SYMBOL(blk_reset_depends_on_task);
+
int __init blk_dev_init(void)
{
BUILD_BUG_ON(__REQ_NR_BITS > 8 *
===================================================================
@@ -783,6 +783,22 @@ int elv_may_queue(struct request_queue *
return ELV_MQUEUE_MAY;
}
+void elv_set_depends_on_task(struct request_queue *q, struct task_struct *tsk)
+{
+ struct elevator_queue *e = q->elevator;
+
+ if (e->ops->elevator_set_depends_on_task_fn)
+ e->ops->elevator_set_depends_on_task_fn(q, tsk);
+}
+
+void elv_reset_depends_on_task(struct request_queue *q)
+{
+ struct elevator_queue *e = q->elevator;
+
+ if (e->ops->elevator_reset_depends_on_task_fn)
+ e->ops->elevator_reset_depends_on_task_fn(q);
+}
+
void elv_abort_queue(struct request_queue *q)
{
struct request *rq;
===================================================================
@@ -209,6 +209,8 @@ typedef int (merge_bvec_fn) (struct requ
typedef void (softirq_done_fn)(struct request *);
typedef int (dma_drain_needed_fn)(struct request *);
typedef int (lld_busy_fn) (struct request_queue *q);
+typedef void (set_depends_on_task_fn) (struct request_queue *q, struct task_struct *tsk);
+typedef void (reset_depends_on_task_fn) (struct request_queue *q);
enum blk_eh_timer_return {
BLK_EH_NOT_HANDLED,
@@ -283,7 +285,8 @@ struct request_queue
rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
-
+ set_depends_on_task_fn *set_depends_on_task_fn;
+ reset_depends_on_task_fn *reset_depends_on_task_fn;
/*
* Dispatch queue sorting
*/
@@ -895,6 +898,9 @@ static inline bool blk_needs_flush_plug(
return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
}
+extern void blk_set_depends_on_task(struct request_queue *q, struct task_struct *tsk);
+extern void blk_reset_depends_on_task(struct request_queue *q);
+
/*
* tag stuff
*/
===================================================================
@@ -23,6 +23,8 @@ typedef void (elevator_add_req_fn) (stru
typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
typedef int (elevator_may_queue_fn) (struct request_queue *, int);
+typedef void (elevator_set_depends_on_task_fn) (struct request_queue *, struct task_struct *);
+typedef void (elevator_reset_depends_on_task_fn) (struct request_queue *);
typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
typedef void (elevator_put_req_fn) (struct request *);
@@ -54,6 +56,8 @@ struct elevator_ops
elevator_put_req_fn *elevator_put_req_fn;
elevator_may_queue_fn *elevator_may_queue_fn;
+ elevator_set_depends_on_task_fn *elevator_set_depends_on_task_fn;
+ elevator_reset_depends_on_task_fn *elevator_reset_depends_on_task_fn;
elevator_init_fn *elevator_init_fn;
elevator_exit_fn *elevator_exit_fn;
@@ -114,6 +118,8 @@ extern struct request *elv_latter_reques
extern int elv_register_queue(struct request_queue *q);
extern void elv_unregister_queue(struct request_queue *q);
extern int elv_may_queue(struct request_queue *, int);
+extern void elv_set_depends_on_task(struct request_queue *q, struct task_struct *tsk);
+extern void elv_reset_depends_on_task(struct request_queue *q);
extern void elv_abort_queue(struct request_queue *);
extern void elv_completed_request(struct request_queue *, struct request *);
extern int elv_set_request(struct request_queue *, struct request *, gfp_t);