diff --git a/block/blk-core.c b/block/blk-core.c index eb0c705f3dfd..326584363d55 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1728,6 +1728,11 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) if (sync) rw_flags |= REQ_SYNC; + /* + * Add in META/PRIO flags, if set, before we get to the IO scheduler + */ + rw_flags |= (bio->bi_rw & (REQ_META | REQ_PRIO)); + /* * Grab a free request. This is might sleep but can not fail. * Returns with the queue unlocked. diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 250cc6a8a236..82f8978c3403 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -36,9 +36,13 @@ static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; /* - * offset from end of service tree + * offset from end of queue service tree for idle class */ #define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service tree under time slice mode */ +#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service under IOPS mode */ +#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5) /* * below this threshold, we consider thinktime immediate @@ -136,7 +140,7 @@ struct cfq_queue { /* io prio of this group */ unsigned short ioprio, org_ioprio; - unsigned short ioprio_class; + unsigned short ioprio_class, org_ioprio_class; pid_t pid; @@ -977,15 +981,6 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) return min_vdisktime; } -static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) -{ - s64 delta = (s64)(vdisktime - min_vdisktime); - if (delta < 0) - min_vdisktime = vdisktime; - - return min_vdisktime; -} - static void update_min_vdisktime(struct cfq_rb_root *st) { struct cfq_group *cfqg; @@ -1361,6 +1356,14 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) cfqg->vfraction = max_t(unsigned, vfr, 1); } +static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd) +{ + if (!iops_mode(cfqd)) + return CFQ_SLICE_MODE_GROUP_DELAY; + else + return CFQ_IOPS_MODE_GROUP_DELAY; +} + static void cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) { @@ -1380,7 +1383,8 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) n = rb_last(&st->rb); if (n) { __cfqg = rb_entry_cfqg(n); - cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; + cfqg->vdisktime = __cfqg->vdisktime + + cfq_get_cfqg_vdisktime_delay(cfqd); } else cfqg->vdisktime = st->min_vdisktime; cfq_group_service_tree_add(st, cfqg); @@ -2566,9 +2570,11 @@ static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) if (!cfqg) return NULL; - for_each_cfqg_st(cfqg, i, j, st) - if ((cfqq = cfq_rb_first(st)) != NULL) + for_each_cfqg_st(cfqg, i, j, st) { + cfqq = cfq_rb_first(st); + if (cfqq) return cfqq; + } return NULL; } @@ -2737,6 +2743,7 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) static void cfq_arm_slice_timer(struct cfq_data *cfqd) { struct cfq_queue *cfqq = cfqd->active_queue; + struct cfq_rb_root *st = cfqq->service_tree; struct cfq_io_cq *cic; u64 sl, group_idle = 0; u64 now = ktime_get_ns(); @@ -2746,7 +2753,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) * for devices that support queuing, otherwise we still have a problem * with sync vs async workloads. */ - if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) + if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag && + !cfqd->cfq_group_idle) return; WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); @@ -2788,8 +2796,13 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) return; } - /* There are other queues in the group, don't do group idle */ - if (group_idle && cfqq->cfqg->nr_cfqq > 1) + /* + * There are other queues in the group or this is the only group and + * it has too big thinktime, don't do group idle. + */ + if (group_idle && + (cfqq->cfqg->nr_cfqq > 1 || + cfq_io_thinktime_big(cfqd, &st->ttime, true))) return; cfq_mark_cfqq_wait_request(cfqq); @@ -3518,6 +3531,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic) * elevate the priority of this queue */ cfqq->org_ioprio = cfqq->ioprio; + cfqq->org_ioprio_class = cfqq->ioprio_class; cfq_clear_cfqq_prio_changed(cfqq); } @@ -4159,6 +4173,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_schedule_dispatch(cfqd); } +static void cfqq_boost_on_prio(struct cfq_queue *cfqq, int rw) +{ + /* + * If REQ_PRIO is set, boost class and prio level, if it's below + * BE/NORM. If prio is not set, restore the potentially boosted + * class/prio level. + */ + if (!(rw & REQ_PRIO)) { + cfqq->ioprio_class = cfqq->org_ioprio_class; + cfqq->ioprio = cfqq->org_ioprio; + } else { + if (cfq_class_idle(cfqq)) + cfqq->ioprio_class = IOPRIO_CLASS_BE; + if (cfqq->ioprio > IOPRIO_NORM) + cfqq->ioprio = IOPRIO_NORM; + } +} + static inline int __cfq_may_queue(struct cfq_queue *cfqq) { if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { @@ -4189,6 +4221,7 @@ static int cfq_may_queue(struct request_queue *q, int rw) cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); if (cfqq) { cfq_init_prio_data(cfqq, cic); + cfqq_boost_on_prio(cfqq, rw); return __cfq_may_queue(cfqq); }