summaryrefslogtreecommitdiff
path: root/sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch
diff options
context:
space:
mode:
Diffstat (limited to 'sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch')
-rw-r--r--sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch1034
1 files changed, 1034 insertions, 0 deletions
diff --git a/sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch b/sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch
new file mode 100644
index 00000000..ea585f02
--- /dev/null
+++ b/sys-kernel/kogaion-sources/files/desktop/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7-for-3.10.0.patch
@@ -0,0 +1,1034 @@
+From efc499347ea3827417cf00718616bf61a090afec Mon Sep 17 00:00:00 2001
+From: Mauro Andreolini <mauro.andreolini@unimore.it>
+Date: Thu, 23 Jan 2014 16:54:44 +0100
+Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v7 for
+ 3.10.0
+
+A set of processes may happen to perform interleaved reads, i.e., requests
+whose union would give rise to a sequential read pattern. There are two
+typical cases: in the first case, processes read fixed-size chunks of
+data at a fixed distance from each other, while in the second case processes
+may read variable-size chunks at variable distances. The latter case occurs
+for example with KVM, which splits the I/O generated by the guest into
+multiple chunks, and lets these chunks be served by a pool of cooperating
+processes, iteratively assigning the next chunk of I/O to the first
+available process. CFQ uses actual queue merging for the first type of
+rocesses, whereas it uses preemption to get a sequential read pattern out
+of the read requests performed by the second type of processes. In the end
+it uses two different mechanisms to achieve the same goal: boosting the
+throughput with interleaved I/O.
+
+This patch introduces Early Queue Merge (EQM), a unified mechanism to get a
+sequential read pattern with both types of processes. The main idea is
+checking newly arrived requests against the next request of the active queue
+both in case of actual request insert and in case of request merge. By doing
+so, both the types of processes can be handled by just merging their queues.
+EQM is then simpler and more compact than the pair of mechanisms used in
+CFQ.
+
+Finally, EQM also preserves the typical low-latency properties of BFQ, by
+properly restoring the weight-raising state of a queue when it gets back to
+a non-merged state.
+
+Signed-off-by: Mauro Andreolini <mauro.andreolini@unimore.it>
+Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
+Reviewed-by: Paolo Valente <paolo.valente@unimore.it>
+---
+ block/bfq-iosched.c | 657 ++++++++++++++++++++++++++++++++++++----------------
+ block/bfq-sched.c | 28 ---
+ block/bfq.h | 16 ++
+ 3 files changed, 474 insertions(+), 227 deletions(-)
+
+diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
+index 96abb81..99083be6 100644
+--- a/block/bfq-iosched.c
++++ b/block/bfq-iosched.c
+@@ -445,6 +445,46 @@ static inline unsigned int bfq_wrais_duration(struct bfq_data *bfqd)
+ return dur;
+ }
+
++static inline void
++bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
++{
++ if (bic->saved_idle_window)
++ bfq_mark_bfqq_idle_window(bfqq);
++ else
++ bfq_clear_bfqq_idle_window(bfqq);
++ if (bic->raising_time_left && bfqq->bfqd->low_latency) {
++ /*
++ * Start a weight raising period with the duration given by
++ * the raising_time_left snapshot.
++ */
++ if (bfq_bfqq_busy(bfqq))
++ bfqq->bfqd->raised_busy_queues++;
++ bfqq->raising_coeff = bfqq->bfqd->bfq_raising_coeff;
++ bfqq->raising_cur_max_time = bic->raising_time_left;
++ bfqq->last_rais_start_finish = jiffies;
++ bfqq->entity.ioprio_changed = 1;
++ }
++ /*
++ * Clear raising_time_left to prevent bfq_bfqq_save_state() from
++ * getting confused about the queue's need of a weight-raising
++ * period.
++ */
++ bic->raising_time_left = 0;
++}
++
++/*
++ * Must be called with the queue_lock held.
++ */
++static int bfqq_process_refs(struct bfq_queue *bfqq)
++{
++ int process_refs, io_refs;
++
++ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
++ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
++ BUG_ON(process_refs < 0);
++ return process_refs;
++}
++
+ static void bfq_add_rq_rb(struct request *rq)
+ {
+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
+@@ -486,12 +526,20 @@ static void bfq_add_rq_rb(struct request *rq)
+ if (!bfqd->low_latency)
+ goto add_bfqq_busy;
+
++ if (bfq_bfqq_just_split(bfqq))
++ goto set_ioprio_changed;
++
+ /*
+- * If the queue is not being boosted and has been idle
+- * for enough time, start a weight-raising period
++ * If the queue:
++ * - is not being boosted,
++ * - has been idle for enough time,
++ * - is not a sync queue or is linked to a bfq_io_cq (it is
++ * shared "for its nature" or it is not shared and its
++ * requests have not been redirected to a shared queue)
++ * start a weight-raising period.
+ */
+- if (old_raising_coeff == 1 &&
+- (idle_for_long_time || soft_rt)) {
++ if (old_raising_coeff == 1 && (idle_for_long_time || soft_rt) &&
++ (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) {
+ bfqq->raising_coeff = bfqd->bfq_raising_coeff;
+ if (idle_for_long_time)
+ bfqq->raising_cur_max_time =
+@@ -572,6 +620,7 @@ static void bfq_add_rq_rb(struct request *rq)
+ bfqd->bfq_raising_rt_max_time;
+ }
+ }
++set_ioprio_changed:
+ if (old_raising_coeff != bfqq->raising_coeff)
+ entity->ioprio_changed = 1;
+ add_bfqq_busy:
+@@ -754,90 +803,35 @@ static void bfq_end_raising(struct bfq_data *bfqd)
+ spin_unlock_irq(bfqd->queue->queue_lock);
+ }
+
+-static int bfq_allow_merge(struct request_queue *q, struct request *rq,
+- struct bio *bio)
+-{
+- struct bfq_data *bfqd = q->elevator->elevator_data;
+- struct bfq_io_cq *bic;
+- struct bfq_queue *bfqq;
+-
+- /*
+- * Disallow merge of a sync bio into an async request.
+- */
+- if (bfq_bio_sync(bio) && !rq_is_sync(rq))
+- return 0;
+-
+- /*
+- * Lookup the bfqq that this bio will be queued with. Allow
+- * merge only if rq is queued there.
+- * Queue lock is held here.
+- */
+- bic = bfq_bic_lookup(bfqd, current->io_context);
+- if (bic == NULL)
+- return 0;
+-
+- bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
+- return bfqq == RQ_BFQQ(rq);
+-}
+-
+-static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
+- struct bfq_queue *bfqq)
+-{
+- if (bfqq != NULL) {
+- bfq_mark_bfqq_must_alloc(bfqq);
+- bfq_mark_bfqq_budget_new(bfqq);
+- bfq_clear_bfqq_fifo_expire(bfqq);
+-
+- bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
+-
+- bfq_log_bfqq(bfqd, bfqq,
+- "set_in_service_queue, cur-budget = %lu",
+- bfqq->entity.budget);
+- }
+-
+- bfqd->in_service_queue = bfqq;
+-}
+-
+-/*
+- * Get and set a new queue for service.
+- */
+-static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd,
+- struct bfq_queue *bfqq)
++static inline sector_t bfq_io_struct_pos(void *io_struct, bool request)
+ {
+- if (!bfqq)
+- bfqq = bfq_get_next_queue(bfqd);
++ if (request)
++ return blk_rq_pos(io_struct);
+ else
+- bfq_get_next_queue_forced(bfqd, bfqq);
+-
+- __bfq_set_in_service_queue(bfqd, bfqq);
+- return bfqq;
++ return ((struct bio *)io_struct)->bi_sector;
+ }
+
+-static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd,
+- struct request *rq)
++static inline sector_t bfq_dist_from(sector_t pos1,
++ sector_t pos2)
+ {
+- if (blk_rq_pos(rq) >= bfqd->last_position)
+- return blk_rq_pos(rq) - bfqd->last_position;
++ if (pos1 >= pos2)
++ return pos1 - pos2;
+ else
+- return bfqd->last_position - blk_rq_pos(rq);
++ return pos2 - pos1;
+ }
+
+-/*
+- * Return true if bfqq has no request pending and rq is close enough to
+- * bfqd->last_position, or if rq is closer to bfqd->last_position than
+- * bfqq->next_rq
+- */
+-static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq)
++static inline int bfq_rq_close_to_sector(void *io_struct, bool request,
++ sector_t sector)
+ {
+- return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR;
++ return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <=
++ BFQQ_SEEK_THR;
+ }
+
+-static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
++static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)
+ {
+ struct rb_root *root = &bfqd->rq_pos_tree;
+ struct rb_node *parent, *node;
+ struct bfq_queue *__bfqq;
+- sector_t sector = bfqd->last_position;
+
+ if (RB_EMPTY_ROOT(root))
+ return NULL;
+@@ -856,7 +850,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
+ * position).
+ */
+ __bfqq = rb_entry(parent, struct bfq_queue, pos_node);
+- if (bfq_rq_close(bfqd, __bfqq->next_rq))
++ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+ return __bfqq;
+
+ if (blk_rq_pos(__bfqq->next_rq) < sector)
+@@ -867,7 +861,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
+ return NULL;
+
+ __bfqq = rb_entry(node, struct bfq_queue, pos_node);
+- if (bfq_rq_close(bfqd, __bfqq->next_rq))
++ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector))
+ return __bfqq;
+
+ return NULL;
+@@ -876,14 +870,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd)
+ /*
+ * bfqd - obvious
+ * cur_bfqq - passed in so that we don't decide that the current queue
+- * is closely cooperating with itself.
+- *
+- * We are assuming that cur_bfqq has dispatched at least one request,
+- * and that bfqd->last_position reflects a position on the disk associated
+- * with the I/O issued by cur_bfqq.
++ * is closely cooperating with itself
++ * sector - used as a reference point to search for a close queue
+ */
+ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
+- struct bfq_queue *cur_bfqq)
++ struct bfq_queue *cur_bfqq,
++ sector_t sector)
+ {
+ struct bfq_queue *bfqq;
+
+@@ -903,7 +895,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
+ * working closely on the same area of the disk. In that case,
+ * we can group them together and don't waste time idling.
+ */
+- bfqq = bfqq_close(bfqd);
++ bfqq = bfqq_close(bfqd, sector);
+ if (bfqq == NULL || bfqq == cur_bfqq)
+ return NULL;
+
+@@ -930,6 +922,282 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
+ return bfqq;
+ }
+
++static struct bfq_queue *
++bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
++{
++ int process_refs, new_process_refs;
++ struct bfq_queue *__bfqq;
++
++ /*
++ * If there are no process references on the new_bfqq, then it is
++ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
++ * may have dropped their last reference (not just their last process
++ * reference).
++ */
++ if (!bfqq_process_refs(new_bfqq))
++ return NULL;
++
++ /* Avoid a circular list and skip interim queue merges. */
++ while ((__bfqq = new_bfqq->new_bfqq)) {
++ if (__bfqq == bfqq)
++ return NULL;
++ new_bfqq = __bfqq;
++ }
++
++ process_refs = bfqq_process_refs(bfqq);
++ new_process_refs = bfqq_process_refs(new_bfqq);
++ /*
++ * If the process for the bfqq has gone away, there is no
++ * sense in merging the queues.
++ */
++ if (process_refs == 0 || new_process_refs == 0)
++ return NULL;
++
++ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
++ new_bfqq->pid);
++
++ /*
++ * Merging is just a redirection: the requests of the process owning
++ * one of the two queues are redirected to the other queue. The latter
++ * queue, in its turn, is set as shared if this is the first time that
++ * the requests of some process are redirected to it.
++ *
++ * We redirect bfqq to new_bfqq and not the opposite, because we
++ * are in the context of the process owning bfqq, hence we have the
++ * io_cq of this process. So we can immediately configure this io_cq
++ * to redirect the requests of the process to new_bfqq.
++ *
++ * NOTE, even if new_bfqq coincides with the in-service queue, the
++ * io_cq of new_bfqq is not available, because, if the in-service queue
++ * is shared, bfqd->in_service_bic may not point to the io_cq of the
++ * in-service queue.
++ * Redirecting the requests of the process owning bfqq to the currently
++ * in-service queue is in any case the best option, as we feed the
++ * in-service queue with new requests close to the last request served
++ * and, by doing so, hopefully increase the throughput.
++ */
++ bfqq->new_bfqq = new_bfqq;
++ atomic_add(process_refs, &new_bfqq->ref);
++ return new_bfqq;
++}
++
++/*
++ * Attempt to schedule a merge of bfqq with the currently in-service queue or
++ * with a close queue among the scheduled queues.
++ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue
++ * structure otherwise.
++ */
++static struct bfq_queue *
++bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
++ void *io_struct, bool request)
++{
++ struct bfq_queue *in_service_bfqq, *new_bfqq;
++
++ if (bfqq->new_bfqq)
++ return bfqq->new_bfqq;
++
++ if (!io_struct)
++ return NULL;
++
++ in_service_bfqq = bfqd->in_service_queue;
++
++ if (in_service_bfqq == NULL || in_service_bfqq == bfqq ||
++ !bfqd->in_service_bic)
++ goto check_scheduled;
++
++ if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq))
++ goto check_scheduled;
++
++ if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq))
++ goto check_scheduled;
++
++ if (in_service_bfqq->entity.parent != bfqq->entity.parent)
++ goto check_scheduled;
++
++ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
++ bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) {
++ new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
++ if (new_bfqq != NULL)
++ return new_bfqq; /* Merge with the in-service queue */
++ }
++
++ /*
++ * Check whether there is a cooperator among currently scheduled
++ * queues. The only thing we need is that the bio/request is not
++ * NULL, as we need it to establish whether a cooperator exists.
++ */
++check_scheduled:
++ new_bfqq = bfq_close_cooperator(bfqd, bfqq,
++ bfq_io_struct_pos(io_struct, request));
++ if (new_bfqq)
++ return bfq_setup_merge(bfqq, new_bfqq);
++
++ return NULL;
++}
++
++static inline void
++bfq_bfqq_save_state(struct bfq_queue *bfqq)
++{
++ /*
++ * If bfqq->bic == NULL, the queue is already shared or its requests
++ * have already been redirected to a shared queue; both idle window
++ * and weight raising state have already been saved. Do nothing.
++ */
++ if (bfqq->bic == NULL)
++ return;
++ if (bfqq->bic->raising_time_left)
++ /*
++ * This is the queue of a just-started process, and would
++ * deserve weight raising: we set raising_time_left to the full
++ * weight-raising duration to trigger weight-raising when and
++ * if the queue is split and the first request of the queue
++ * is enqueued.
++ */
++ bfqq->bic->raising_time_left = bfq_wrais_duration(bfqq->bfqd);
++ else if (bfqq->raising_coeff > 1) {
++ unsigned long wrais_duration =
++ jiffies - bfqq->last_rais_start_finish;
++ /*
++ * It may happen that a queue's weight raising period lasts
++ * longer than its raising_cur_max_time, as weight raising is
++ * handled only when a request is enqueued or dispatched (it
++ * does not use any timer). If the weight raising period is
++ * about to end, don't save it.
++ */
++ if (bfqq->raising_cur_max_time <= wrais_duration)
++ bfqq->bic->raising_time_left = 0;
++ else
++ bfqq->bic->raising_time_left =
++ bfqq->raising_cur_max_time - wrais_duration;
++ /*
++ * The bfq_queue is becoming shared or the requests of the
++ * process owning the queue are being redirected to a shared
++ * queue. Stop the weight raising period of the queue, as in
++ * both cases it should not be owned by an interactive or soft
++ * real-time application.
++ */
++ bfq_bfqq_end_raising(bfqq);
++ } else
++ bfqq->bic->raising_time_left = 0;
++ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
++}
++
++static inline void
++bfq_get_bic_reference(struct bfq_queue *bfqq)
++{
++ /*
++ * If bfqq->bic has a non-NULL value, the bic to which it belongs
++ * is about to begin using a shared bfq_queue.
++ */
++ if (bfqq->bic)
++ atomic_long_inc(&bfqq->bic->icq.ioc->refcount);
++}
++
++static void
++bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
++ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
++{
++ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
++ (long unsigned)new_bfqq->pid);
++ /* Save weight raising and idle window of the merged queues */
++ bfq_bfqq_save_state(bfqq);
++ bfq_bfqq_save_state(new_bfqq);
++ /*
++ * Grab a reference to the bic, to prevent it from being destroyed
++ * before being possibly touched by a bfq_split_bfqq().
++ */
++ bfq_get_bic_reference(bfqq);
++ bfq_get_bic_reference(new_bfqq);
++ /* Merge queues (that is, let bic redirect its requests to new_bfqq) */
++ bic_set_bfqq(bic, new_bfqq, 1);
++ bfq_mark_bfqq_coop(new_bfqq);
++ /*
++ * new_bfqq now belongs to at least two bics (it is a shared queue): set
++ * new_bfqq->bic to NULL. bfqq either:
++ * - does not belong to any bic any more, and hence bfqq->bic must
++ * be set to NULL, or
++ * - is a queue whose owning bics have already been redirected to a
++ * different queue, hence the queue is destined to not belong to any
++ * bic soon and bfqq->bic is already NULL (therefore the next
++ * assignment causes no harm).
++ */
++ new_bfqq->bic = NULL;
++ bfqq->bic = NULL;
++ bfq_put_queue(bfqq);
++}
++
++static int bfq_allow_merge(struct request_queue *q, struct request *rq,
++ struct bio *bio)
++{
++ struct bfq_data *bfqd = q->elevator->elevator_data;
++ struct bfq_io_cq *bic;
++ struct bfq_queue *bfqq, *new_bfqq;
++
++ /*
++ * Disallow merge of a sync bio into an async request.
++ */
++ if (bfq_bio_sync(bio) && !rq_is_sync(rq))
++ return 0;
++
++ /*
++ * Lookup the bfqq that this bio will be queued with. Allow
++ * merge only if rq is queued there.
++ * Queue lock is held here.
++ */
++ bic = bfq_bic_lookup(bfqd, current->io_context);
++ if (bic == NULL)
++ return 0;
++
++ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
++ /*
++ * We take advantage of this function to perform an early merge
++ * of the queues of possible cooperating processes.
++ */
++ if (bfqq != NULL) {
++ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
++ if (new_bfqq != NULL) {
++ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
++ /*
++ * If we get here, the bio will be queued in the shared queue,
++ * i.e., new_bfqq, so use new_bfqq to decide whether bio and
++ * rq can be merged.
++ */
++ bfqq = new_bfqq;
++ }
++ }
++
++ return bfqq == RQ_BFQQ(rq);
++}
++
++static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
++ struct bfq_queue *bfqq)
++{
++ if (bfqq != NULL) {
++ bfq_mark_bfqq_must_alloc(bfqq);
++ bfq_mark_bfqq_budget_new(bfqq);
++ bfq_clear_bfqq_fifo_expire(bfqq);
++
++ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
++
++ bfq_log_bfqq(bfqd, bfqq,
++ "set_in_service_queue, cur-budget = %lu",
++ bfqq->entity.budget);
++ }
++
++ bfqd->in_service_queue = bfqq;
++}
++
++/*
++ * Get and set a new queue for service.
++ */
++static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
++{
++ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd);
++
++ __bfq_set_in_service_queue(bfqd, bfqq);
++ return bfqq;
++}
++
+ /*
+ * If enough samples have been computed, return the current max budget
+ * stored in bfqd, which is dynamically updated according to the
+@@ -1077,63 +1345,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
+ return rq;
+ }
+
+-/*
+- * Must be called with the queue_lock held.
+- */
+-static int bfqq_process_refs(struct bfq_queue *bfqq)
+-{
+- int process_refs, io_refs;
+-
+- io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
+- process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
+- BUG_ON(process_refs < 0);
+- return process_refs;
+-}
+-
+-static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+-{
+- int process_refs, new_process_refs;
+- struct bfq_queue *__bfqq;
+-
+- /*
+- * If there are no process references on the new_bfqq, then it is
+- * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain
+- * may have dropped their last reference (not just their last process
+- * reference).
+- */
+- if (!bfqq_process_refs(new_bfqq))
+- return;
+-
+- /* Avoid a circular list and skip interim queue merges. */
+- while ((__bfqq = new_bfqq->new_bfqq)) {
+- if (__bfqq == bfqq)
+- return;
+- new_bfqq = __bfqq;
+- }
+-
+- process_refs = bfqq_process_refs(bfqq);
+- new_process_refs = bfqq_process_refs(new_bfqq);
+- /*
+- * If the process for the bfqq has gone away, there is no
+- * sense in merging the queues.
+- */
+- if (process_refs == 0 || new_process_refs == 0)
+- return;
+-
+- /*
+- * Merge in the direction of the lesser amount of work.
+- */
+- if (new_process_refs >= process_refs) {
+- bfqq->new_bfqq = new_bfqq;
+- atomic_add(process_refs, &new_bfqq->ref);
+- } else {
+- new_bfqq->new_bfqq = bfqq;
+- atomic_add(new_process_refs, &bfqq->ref);
+- }
+- bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
+- new_bfqq->pid);
+-}
+-
+ static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)
+ {
+ struct bfq_entity *entity = &bfqq->entity;
+@@ -1703,7 +1914,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
+ */
+ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+ {
+- struct bfq_queue *bfqq, *new_bfqq = NULL;
++ struct bfq_queue *bfqq;
+ struct request *next_rq;
+ enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
+
+@@ -1713,17 +1924,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+
+ bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
+
+- /*
+- * If another queue has a request waiting within our mean seek
+- * distance, let it run. The expire code will check for close
+- * cooperators and put the close queue at the front of the
+- * service tree. If possible, merge the expiring queue with the
+- * new bfqq.
+- */
+- new_bfqq = bfq_close_cooperator(bfqd, bfqq);
+- if (new_bfqq != NULL && bfqq->new_bfqq == NULL)
+- bfq_setup_merge(bfqq, new_bfqq);
+-
+ if (bfq_may_expire_for_budg_timeout(bfqq) &&
+ !timer_pending(&bfqd->idle_slice_timer) &&
+ !bfq_bfqq_must_idle(bfqq))
+@@ -1760,36 +1960,26 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
+ bfq_clear_bfqq_wait_request(bfqq);
+ del_timer(&bfqd->idle_slice_timer);
+ }
+- if (new_bfqq == NULL)
+- goto keep_queue;
+- else
+- goto expire;
++ goto keep_queue;
+ }
+ }
+
+ /*
+- * No requests pending. If the in-service queue has no cooperator and
+- * still has requests in flight (possibly waiting for a completion)
+- * or is idling for a new request, then keep it.
++ * No requests pending. If the in-service queue still has requests in
++ * flight (possibly waiting for a completion) or is idling for a new
++ * request, then keep it.
+ */
+- if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) ||
+- (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) {
++ if (timer_pending(&bfqd->idle_slice_timer) ||
++ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) {
+ bfqq = NULL;
+ goto keep_queue;
+- } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) {
+- /*
+- * Expiring the queue because there is a close cooperator,
+- * cancel timer.
+- */
+- bfq_clear_bfqq_wait_request(bfqq);
+- del_timer(&bfqd->idle_slice_timer);
+ }
+
+ reason = BFQ_BFQQ_NO_MORE_REQUESTS;
+ expire:
+ bfq_bfqq_expire(bfqd, bfqq, 0, reason);
+ new_queue:
+- bfqq = bfq_set_in_service_queue(bfqd, new_bfqq);
++ bfqq = bfq_set_in_service_queue(bfqd);
+ bfq_log(bfqd, "select_queue: new queue %d returned",
+ bfqq != NULL ? bfqq->pid : 0);
+ keep_queue:
+@@ -1799,9 +1989,8 @@ keep_queue:
+ static void bfq_update_raising_data(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq)
+ {
++ struct bfq_entity *entity = &bfqq->entity;
+ if (bfqq->raising_coeff > 1) { /* queue is being boosted */
+- struct bfq_entity *entity = &bfqq->entity;
+-
+ bfq_log_bfqq(bfqd, bfqq,
+ "raising period dur %u/%u msec, "
+ "old raising coeff %u, w %d(%d)",
+@@ -1818,7 +2007,7 @@ static void bfq_update_raising_data(struct bfq_data *bfqd,
+ "WARN: pending prio change");
+ /*
+ * If too much time has elapsed from the beginning
+- * of this weight-raising, stop it.
++ * of this weight-raising period, stop it.
+ */
+ if (jiffies - bfqq->last_rais_start_finish >
+ bfqq->raising_cur_max_time) {
+@@ -1830,11 +2019,13 @@ static void bfq_update_raising_data(struct bfq_data *bfqd,
+ jiffies_to_msecs(bfqq->
+ raising_cur_max_time));
+ bfq_bfqq_end_raising(bfqq);
+- __bfq_entity_update_weight_prio(
+- bfq_entity_service_tree(entity),
+- entity);
+ }
+ }
++ /* Update weight both if it must be raised and if it must be lowered */
++ if ((entity->weight > entity->orig_weight) != (bfqq->raising_coeff > 1))
++ __bfq_entity_update_weight_prio(
++ bfq_entity_service_tree(entity),
++ entity);
+ }
+
+ /*
+@@ -2075,6 +2266,25 @@ static void bfq_init_icq(struct io_cq *icq)
+ struct bfq_io_cq *bic = icq_to_bic(icq);
+
+ bic->ttime.last_end_request = jiffies;
++ /*
++ * A newly created bic indicates that the process has just
++ * started doing I/O, and is probably mapping into memory its
++ * executable and libraries: it definitely needs weight raising.
++ * There is however the possibility that the process performs,
++ * for a while, I/O close to some other process. EQM intercepts
++ * this behavior and may merge the queue corresponding to the
++ * process with some other queue, BEFORE the weight of the queue
++ * is raised. Merged queues are not weight-raised (they are assumed
++ * to belong to processes that benefit only from high throughput).
++ * If the merge is basically the consequence of an accident, then
++ * the queue will be split soon and will get back its old weight.
++ * It is then important to write down somewhere that this queue
++ * does need weight raising, even if it did not make it to get its
++ * weight raised before being merged. To this purpose, we overload
++ * the field raising_time_left and assign 1 to it, to mark the queue
++ * as needing weight raising.
++ */
++ bic->raising_time_left = 1;
+ }
+
+ static void bfq_exit_icq(struct io_cq *icq)
+@@ -2088,6 +2298,13 @@ static void bfq_exit_icq(struct io_cq *icq)
+ }
+
+ if (bic->bfqq[BLK_RW_SYNC]) {
++ /*
++ * If the bic is using a shared queue, put the reference
++ * taken on the io_context when the bic started using a
++ * shared bfq_queue.
++ */
++ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC]))
++ put_io_context(icq->ioc);
+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]);
+ bic->bfqq[BLK_RW_SYNC] = NULL;
+ }
+@@ -2375,6 +2592,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
+ return;
+
++ /* Idle window just restored, statistics are meaningless. */
++ if (bfq_bfqq_just_split(bfqq))
++ return;
++
+ enable_idle = bfq_bfqq_idle_window(bfqq);
+
+ if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
+@@ -2415,6 +2636,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
+ !BFQQ_SEEKY(bfqq))
+ bfq_update_idle_window(bfqd, bfqq, bic);
++ bfq_clear_bfqq_just_split(bfqq);
+
+ bfq_log_bfqq(bfqd, bfqq,
+ "rq_enqueued: idle_window=%d (seeky %d, mean %llu)",
+@@ -2475,13 +2697,48 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ static void bfq_insert_request(struct request_queue *q, struct request *rq)
+ {
+ struct bfq_data *bfqd = q->elevator->elevator_data;
+- struct bfq_queue *bfqq = RQ_BFQQ(rq);
++ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq;
+
+ assert_spin_locked(bfqd->queue->queue_lock);
++
++ /*
++ * An unplug may trigger a requeue of a request from the device
++ * driver: make sure we are in process context while trying to
++ * merge two bfq_queues.
++ */
++ if (!in_interrupt()) {
++ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
++ if (new_bfqq != NULL) {
++ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
++ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
++ /*
++ * Release the request's reference to the old bfqq
++ * and make sure one is taken to the shared queue.
++ */
++ new_bfqq->allocated[rq_data_dir(rq)]++;
++ bfqq->allocated[rq_data_dir(rq)]--;
++ atomic_inc(&new_bfqq->ref);
++ bfq_put_queue(bfqq);
++ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
++ bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
++ bfqq, new_bfqq);
++ rq->elv.priv[1] = new_bfqq;
++ bfqq = new_bfqq;
++ }
++ }
++
+ bfq_init_prio_data(bfqq, RQ_BIC(rq));
+
+ bfq_add_rq_rb(rq);
+
++ /*
++ * Here a newly-created bfq_queue has already started a weight-raising
++ * period: clear raising_time_left to prevent bfq_bfqq_save_state()
++ * from assigning it a full weight-raising period. See the detailed
++ * comments about this field in bfq_init_icq().
++ */
++ if (bfqq->bic != NULL)
++ bfqq->bic->raising_time_left = 0;
+ rq_set_fifo_time(rq, jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
+ list_add_tail(&rq->queuelist, &bfqq->fifo);
+
+@@ -2629,18 +2886,6 @@ static void bfq_put_request(struct request *rq)
+ }
+ }
+
+-static struct bfq_queue *
+-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+- struct bfq_queue *bfqq)
+-{
+- bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
+- (long unsigned)bfqq->new_bfqq->pid);
+- bic_set_bfqq(bic, bfqq->new_bfqq, 1);
+- bfq_mark_bfqq_coop(bfqq->new_bfqq);
+- bfq_put_queue(bfqq);
+- return bic_to_bfqq(bic, 1);
+-}
+-
+ /*
+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this
+ * was the last process referring to said bfqq.
+@@ -2649,6 +2894,9 @@ static struct bfq_queue *
+ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
+ {
+ bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
++
++ put_io_context(bic->icq.ioc);
++
+ if (bfqq_process_refs(bfqq) == 1) {
+ bfqq->pid = current->pid;
+ bfq_clear_bfqq_coop(bfqq);
+@@ -2677,6 +2925,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
+ struct bfq_queue *bfqq;
+ struct bfq_group *bfqg;
+ unsigned long flags;
++ bool split = false;
+
+ might_sleep_if(gfp_mask & __GFP_WAIT);
+
+@@ -2695,24 +2944,14 @@ new_queue:
+ bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
+ bic_set_bfqq(bic, bfqq, is_sync);
+ } else {
+- /*
+- * If the queue was seeky for too long, break it apart.
+- */
++ /* If the queue was seeky for too long, break it apart. */
+ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
+ bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
+ bfqq = bfq_split_bfqq(bic, bfqq);
++ split = true;
+ if (!bfqq)
+ goto new_queue;
+ }
+-
+- /*
+- * Check to see if this queue is scheduled to merge with
+- * another closely cooperating queue. The merging of queues
+- * happens here as it must be done in process context.
+- * The reference on new_bfqq was taken in merge_bfqqs.
+- */
+- if (bfqq->new_bfqq != NULL)
+- bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq);
+ }
+
+ bfqq->allocated[rw]++;
+@@ -2723,6 +2962,26 @@ new_queue:
+ rq->elv.priv[0] = bic;
+ rq->elv.priv[1] = bfqq;
+
++ /*
++ * If a bfq_queue has only one process reference, it is owned
++ * by only one bfq_io_cq: we can set the bic field of the
++ * bfq_queue to the address of that structure. Also, if the
++ * queue has just been split, mark a flag so that the
++ * information is available to the other scheduler hooks.
++ */
++ if (bfqq_process_refs(bfqq) == 1) {
++ bfqq->bic = bic;
++ if (split) {
++ bfq_mark_bfqq_just_split(bfqq);
++ /*
++ * If the queue has just been split from a shared queue,
++ * restore the idle window and the possible weight
++ * raising period.
++ */
++ bfq_bfqq_resume_state(bfqq, bic);
++ }
++ }
++
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ return 0;
+diff --git a/block/bfq-sched.c b/block/bfq-sched.c
+index 30df81c..47e66a8 100644
+--- a/block/bfq-sched.c
++++ b/block/bfq-sched.c
+@@ -979,34 +979,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
+ return bfqq;
+ }
+
+-/*
+- * Forced extraction of the given queue.
+- */
+-static void bfq_get_next_queue_forced(struct bfq_data *bfqd,
+- struct bfq_queue *bfqq)
+-{
+- struct bfq_entity *entity;
+- struct bfq_sched_data *sd;
+-
+- BUG_ON(bfqd->in_service_queue != NULL);
+-
+- entity = &bfqq->entity;
+- /*
+- * Bubble up extraction/update from the leaf to the root.
+- */
+- for_each_entity(entity) {
+- sd = entity->sched_data;
+- bfq_update_budget(entity);
+- bfq_update_vtime(bfq_entity_service_tree(entity));
+- bfq_active_extract(bfq_entity_service_tree(entity), entity);
+- sd->active_entity = entity;
+- sd->next_active = NULL;
+- entity->service = 0;
+- }
+-
+- return;
+-}
+-
+ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
+ {
+ if (bfqd->in_service_bic != NULL) {
+diff --git a/block/bfq.h b/block/bfq.h
+index 78da7d2..b6ebc1d 100644
+--- a/block/bfq.h
++++ b/block/bfq.h
+@@ -192,6 +192,8 @@ struct bfq_group;
+ * idle to backlogged
+ * @service_from_backlogged: cumulative service received from the @bfq_queue
+ * since the last transition from idle to backlogged
++ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the
++ * queue is shared
+ *
+ * A bfq_queue is a leaf request queue; it can be associated to an io_context
+ * or more (if it is an async one). @cgroup holds a reference to the
+@@ -235,6 +237,7 @@ struct bfq_queue {
+ sector_t last_request_pos;
+
+ pid_t pid;
++ struct bfq_io_cq *bic;
+
+ /* weight-raising fields */
+ unsigned int raising_cur_max_time;
+@@ -264,12 +267,23 @@ struct bfq_ttime {
+ * @icq: associated io_cq structure
+ * @bfqq: array of two process queues, the sync and the async
+ * @ttime: associated @bfq_ttime struct
++ * @raising_time_left: snapshot of the time left before weight raising ends
++ * for the sync queue associated to this process; this
++ * snapshot is taken to remember this value while the weight
++ * raising is suspended because the queue is merged with a
++ * shared queue, and is used to set @raising_cur_max_time
++ * when the queue is split from the shared queue and its
++ * weight is raised again
++ * @saved_idle_window: same purpose as the previous field for the idle window
+ */
+ struct bfq_io_cq {
+ struct io_cq icq; /* must be the first member */
+ struct bfq_queue *bfqq[2];
+ struct bfq_ttime ttime;
+ int ioprio;
++
++ unsigned int raising_time_left;
++ unsigned int saved_idle_window;
+ };
+
+ /**
+@@ -411,6 +425,7 @@ enum bfqq_state_flags {
+ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */
+ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */
+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */
++ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */
+ BFQ_BFQQ_FLAG_softrt_update, /* needs softrt-next-start update */
+ };
+
+@@ -438,6 +453,7 @@ BFQ_BFQQ_FNS(sync);
+ BFQ_BFQQ_FNS(budget_new);
+ BFQ_BFQQ_FNS(coop);
+ BFQ_BFQQ_FNS(split_coop);
++BFQ_BFQQ_FNS(just_split);
+ BFQ_BFQQ_FNS(softrt_update);
+ #undef BFQ_BFQQ_FNS
+
+--
+1.8.5.2
+