diff options
Diffstat (limited to 'block/elevator.c')
-rw-r--r-- | block/elevator.c | 397 |
1 files changed, 228 insertions, 169 deletions
diff --git a/block/elevator.c b/block/elevator.c index 2f17d66d0e61..5ff093cb3cf8 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -26,7 +26,6 @@ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/blkdev.h> -#include <linux/elevator.h> #include <linux/bio.h> #include <linux/module.h> #include <linux/slab.h> @@ -36,14 +35,15 @@ #include <linux/hash.h> #include <linux/uaccess.h> #include <linux/pm_runtime.h> -#include <linux/blk-cgroup.h> #include <trace/events/block.h> +#include "elevator.h" #include "blk.h" #include "blk-mq-sched.h" #include "blk-pm.h" #include "blk-wbt.h" +#include "blk-cgroup.h" static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); @@ -57,7 +57,7 @@ static LIST_HEAD(elv_list); * Query io scheduler to see if the current process issuing bio may be * merged with rq. */ -static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) +static bool elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) { struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; @@ -65,7 +65,7 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) if (e->type->ops.allow_merge) return e->type->ops.allow_merge(q, rq, bio); - return 1; + return true; } /* @@ -83,73 +83,50 @@ bool elv_bio_merge_ok(struct request *rq, struct bio *bio) } EXPORT_SYMBOL(elv_bio_merge_ok); -static bool elevator_match(const struct elevator_type *e, const char *name) +static inline bool elv_support_features(struct request_queue *q, + const struct elevator_type *e) { - if (!strcmp(e->elevator_name, name)) - return true; - if (e->elevator_alias && !strcmp(e->elevator_alias, name)) - return true; - - return false; + return (q->required_elevator_features & e->elevator_features) == + q->required_elevator_features; } -/* - * Return scheduler with name 'name' +/** + * elevator_match - Check whether @e's name or alias matches @name + * @e: Scheduler to test + * @name: Elevator name to test + * + * Return true if the elevator @e's name or alias matches @name. */ -static struct elevator_type *elevator_find(const char *name) +static bool elevator_match(const struct elevator_type *e, const char *name) +{ + return !strcmp(e->elevator_name, name) || + (e->elevator_alias && !strcmp(e->elevator_alias, name)); +} + +static struct elevator_type *__elevator_find(const char *name) { struct elevator_type *e; - list_for_each_entry(e, &elv_list, list) { + list_for_each_entry(e, &elv_list, list) if (elevator_match(e, name)) return e; - } - return NULL; } -static void elevator_put(struct elevator_type *e) -{ - module_put(e->elevator_owner); -} - -static struct elevator_type *elevator_get(struct request_queue *q, - const char *name, bool try_loading) +static struct elevator_type *elevator_find_get(struct request_queue *q, + const char *name) { struct elevator_type *e; spin_lock(&elv_list_lock); - - e = elevator_find(name); - if (!e && try_loading) { - spin_unlock(&elv_list_lock); - request_module("%s-iosched", name); - spin_lock(&elv_list_lock); - e = elevator_find(name); - } - - if (e && !try_module_get(e->elevator_owner)) + e = __elevator_find(name); + if (e && (!elv_support_features(q, e) || !elevator_tryget(e))) e = NULL; - spin_unlock(&elv_list_lock); return e; } -static char chosen_elevator[ELV_NAME_MAX]; - -static int __init elevator_setup(char *str) -{ - /* - * Be backwards-compatible with previous kernels, so users - * won't get the wrong elevator. - */ - strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); - return 1; -} - -__setup("elevator=", elevator_setup); - -static struct kobj_type elv_ktype; +static const struct kobj_type elv_ktype; struct elevator_queue *elevator_alloc(struct request_queue *q, struct elevator_type *e) @@ -160,6 +137,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, if (unlikely(!eq)) return NULL; + __elevator_get(e); eq->type = e; kobject_init(&eq->kobj, &elv_ktype); mutex_init(&eq->sysfs_lock); @@ -178,11 +156,15 @@ static void elevator_release(struct kobject *kobj) kfree(e); } -void __elevator_exit(struct request_queue *q, struct elevator_queue *e) +void elevator_exit(struct request_queue *q) { + struct elevator_queue *e = q->elevator; + + ioc_clear_queue(q); + blk_mq_sched_free_rqs(q); + mutex_lock(&e->sysfs_lock); - if (e->type->ops.exit_sched) - blk_mq_exit_sched(q, e); + blk_mq_exit_sched(q, e); mutex_unlock(&e->sysfs_lock); kobject_put(&e->kobj); @@ -327,6 +309,9 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req, __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); if (__rq && elv_bio_merge_ok(__rq, bio)) { *req = __rq; + + if (blk_discard_mergable(__rq)) + return ELEVATOR_DISCARD_MERGE; return ELEVATOR_BACK_MERGE; } @@ -341,9 +326,11 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req, * we can append 'rq' to an existing request, so we can throw 'rq' away * afterwards. * - * Returns true if we merged, false otherwise + * Returns true if we merged, false otherwise. 'free' will contain all + * requests that need to be freed. */ -bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) +bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq, + struct list_head *free) { struct request *__rq; bool ret; @@ -354,8 +341,10 @@ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) /* * First try one-hit cache. */ - if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) + if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) { + list_add(&rq->queuelist, free); return true; + } if (blk_queue_noxmerges(q)) return false; @@ -369,6 +358,7 @@ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) break; + list_add(&rq->queuelist, free); /* The merged request could be merged with others, try again */ ret = true; rq = __rq; @@ -465,19 +455,19 @@ static const struct sysfs_ops elv_sysfs_ops = { .store = elv_attr_store, }; -static struct kobj_type elv_ktype = { +static const struct kobj_type elv_ktype = { .sysfs_ops = &elv_sysfs_ops, .release = elevator_release, }; -int elv_register_queue(struct request_queue *q) +int elv_register_queue(struct request_queue *q, bool uevent) { struct elevator_queue *e = q->elevator; int error; lockdep_assert_held(&q->sysfs_lock); - error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); + error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched"); if (!error) { struct elv_fs_entry *attr = e->type->elevator_attrs; if (attr) { @@ -487,29 +477,35 @@ int elv_register_queue(struct request_queue *q) attr++; } } - kobject_uevent(&e->kobj, KOBJ_ADD); - e->registered = 1; + if (uevent) + kobject_uevent(&e->kobj, KOBJ_ADD); + + set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags); } return error; } void elv_unregister_queue(struct request_queue *q) { - lockdep_assert_held(&q->sysfs_lock); + struct elevator_queue *e = q->elevator; - if (q) { - struct elevator_queue *e = q->elevator; + lockdep_assert_held(&q->sysfs_lock); + if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) { kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_del(&e->kobj); - e->registered = 0; - /* Re-enable throttling in case elevator disabled it */ - wbt_enable_default(q); } } int elv_register(struct elevator_type *e) { + /* finish request is mandatory */ + if (WARN_ON_ONCE(!e->ops.finish_request)) + return -EINVAL; + /* insert_requests and dispatch_request are mandatory */ + if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request)) + return -EINVAL; + /* create icq_cache if requested */ if (e->icq_size) { if (WARN_ON(e->icq_size < sizeof(struct io_cq)) || @@ -526,7 +522,7 @@ int elv_register(struct elevator_type *e) /* register, don't allow duplicate names */ spin_lock(&elv_list_lock); - if (elevator_find(e->elevator_name)) { + if (__elevator_find(e->elevator_name)) { spin_unlock(&elv_list_lock); kmem_cache_destroy(e->icq_cache); return -EBUSY; @@ -559,184 +555,238 @@ void elv_unregister(struct elevator_type *e) } EXPORT_SYMBOL_GPL(elv_unregister); -int elevator_switch_mq(struct request_queue *q, - struct elevator_type *new_e) +static inline bool elv_support_iosched(struct request_queue *q) { - int ret; + if (!queue_is_mq(q) || + (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))) + return false; + return true; +} - lockdep_assert_held(&q->sysfs_lock); +/* + * For single queue devices, default to using mq-deadline. If we have multiple + * queues or mq-deadline is not available, default to "none". + */ +static struct elevator_type *elevator_get_default(struct request_queue *q) +{ + if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) + return NULL; - if (q->elevator) { - if (q->elevator->registered) - elv_unregister_queue(q); - ioc_clear_queue(q); - elevator_exit(q, q->elevator); - } + if (q->nr_hw_queues != 1 && + !blk_mq_is_shared_tags(q->tag_set->flags)) + return NULL; - ret = blk_mq_init_sched(q, new_e); - if (ret) - goto out; + return elevator_find_get(q, "mq-deadline"); +} + +/* + * Get the first elevator providing the features required by the request queue. + * Default to "none" if no matching elevator is found. + */ +static struct elevator_type *elevator_get_by_features(struct request_queue *q) +{ + struct elevator_type *e, *found = NULL; + + spin_lock(&elv_list_lock); - if (new_e) { - ret = elv_register_queue(q); - if (ret) { - elevator_exit(q, q->elevator); - goto out; + list_for_each_entry(e, &elv_list, list) { + if (elv_support_features(q, e)) { + found = e; + break; } } - if (new_e) - blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); - else - blk_add_trace_msg(q, "elv switch: none"); + if (found && !elevator_tryget(found)) + found = NULL; -out: - return ret; + spin_unlock(&elv_list_lock); + return found; } /* - * For blk-mq devices, we default to using mq-deadline, if available, for single - * queue devices. If deadline isn't available OR we have multiple queues, - * default to "none". + * For a device queue that has no required features, use the default elevator + * settings. Otherwise, use the first elevator available matching the required + * features. If no suitable elevator is find or if the chosen elevator + * initialization fails, fall back to the "none" elevator (no elevator). */ -int elevator_init_mq(struct request_queue *q) +void elevator_init_mq(struct request_queue *q) { struct elevator_type *e; - int err = 0; + int err; - if (q->nr_hw_queues != 1) - return 0; + if (!elv_support_iosched(q)) + return; + + WARN_ON_ONCE(blk_queue_registered(q)); - /* - * q->sysfs_lock must be held to provide mutual exclusion between - * elevator_switch() and here. - */ - mutex_lock(&q->sysfs_lock); if (unlikely(q->elevator)) - goto out_unlock; + return; - e = elevator_get(q, "mq-deadline", false); + if (!q->required_elevator_features) + e = elevator_get_default(q); + else + e = elevator_get_by_features(q); if (!e) - goto out_unlock; + return; + + /* + * We are called before adding disk, when there isn't any FS I/O, + * so freezing queue plus canceling dispatch work is enough to + * drain any dispatch activities originated from passthrough + * requests, then no need to quiesce queue which may add long boot + * latency, especially when lots of disks are involved. + */ + blk_mq_freeze_queue(q); + blk_mq_cancel_work_sync(q); err = blk_mq_init_sched(q, e); - if (err) - elevator_put(e); -out_unlock: - mutex_unlock(&q->sysfs_lock); - return err; -} + blk_mq_unfreeze_queue(q); + + if (err) { + pr_warn("\"%s\" elevator initialization failed, " + "falling back to \"none\"\n", e->elevator_name); + } + + elevator_put(e); +} /* - * switch to new_e io scheduler. be careful not to introduce deadlocks - - * we don't free the old io scheduler, before we have allocated what we - * need for the new one. this way we have a chance of going back to the old - * one, if the new one fails init for some reason. + * Switch to new_e io scheduler. + * + * If switching fails, we are most likely running out of memory and not able + * to restore the old io scheduler, so leaving the io scheduler being none. */ -static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) +int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { - int err; + int ret; lockdep_assert_held(&q->sysfs_lock); blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); - err = elevator_switch_mq(q, new_e); + if (q->elevator) { + elv_unregister_queue(q); + elevator_exit(q); + } + ret = blk_mq_init_sched(q, new_e); + if (ret) + goto out_unfreeze; + + ret = elv_register_queue(q, true); + if (ret) { + elevator_exit(q); + goto out_unfreeze; + } + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); + +out_unfreeze: blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q); - return err; + if (ret) { + pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n", + new_e->elevator_name); + } + + return ret; +} + +void elevator_disable(struct request_queue *q) +{ + lockdep_assert_held(&q->sysfs_lock); + + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + + elv_unregister_queue(q); + elevator_exit(q); + blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q); + q->elevator = NULL; + q->nr_requests = q->tag_set->queue_depth; + blk_add_trace_msg(q, "elv switch: none"); + + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); } /* * Switch this queue to the given IO scheduler. */ -static int __elevator_change(struct request_queue *q, const char *name) +static int elevator_change(struct request_queue *q, const char *elevator_name) { - char elevator_name[ELV_NAME_MAX]; struct elevator_type *e; + int ret; /* Make sure queue is not in the middle of being removed */ - if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)) + if (!blk_queue_registered(q)) return -ENOENT; - /* - * Special case for mq, turn off scheduling - */ - if (!strncmp(name, "none", 4)) { - if (!q->elevator) - return 0; - return elevator_switch(q, NULL); - } - - strlcpy(elevator_name, name, sizeof(elevator_name)); - e = elevator_get(q, strstrip(elevator_name), true); - if (!e) - return -EINVAL; - - if (q->elevator && elevator_match(q->elevator->type, elevator_name)) { - elevator_put(e); + if (!strncmp(elevator_name, "none", 4)) { + if (q->elevator) + elevator_disable(q); return 0; } - return elevator_switch(q, e); -} + if (q->elevator && elevator_match(q->elevator->type, elevator_name)) + return 0; -static inline bool elv_support_iosched(struct request_queue *q) -{ - if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) - return false; - return true; + e = elevator_find_get(q, elevator_name); + if (!e) { + request_module("%s-iosched", elevator_name); + e = elevator_find_get(q, elevator_name); + if (!e) + return -EINVAL; + } + ret = elevator_switch(q, e); + elevator_put(e); + return ret; } -ssize_t elv_iosched_store(struct request_queue *q, const char *name, +ssize_t elv_iosched_store(struct request_queue *q, const char *buf, size_t count) { + char elevator_name[ELV_NAME_MAX]; int ret; - if (!queue_is_mq(q) || !elv_support_iosched(q)) + if (!elv_support_iosched(q)) return count; - ret = __elevator_change(q, name); + strscpy(elevator_name, buf, sizeof(elevator_name)); + ret = elevator_change(q, strstrip(elevator_name)); if (!ret) return count; - return ret; } ssize_t elv_iosched_show(struct request_queue *q, char *name) { - struct elevator_queue *e = q->elevator; - struct elevator_type *elv = NULL; - struct elevator_type *__e; + struct elevator_queue *eq = q->elevator; + struct elevator_type *cur = NULL, *e; int len = 0; - if (!queue_is_mq(q)) + if (!elv_support_iosched(q)) return sprintf(name, "none\n"); - if (!q->elevator) + if (!q->elevator) { len += sprintf(name+len, "[none] "); - else - elv = e->type; + } else { + len += sprintf(name+len, "none "); + cur = eq->type; + } spin_lock(&elv_list_lock); - list_for_each_entry(__e, &elv_list, list) { - if (elv && elevator_match(elv, __e->elevator_name)) { - len += sprintf(name+len, "[%s] ", elv->elevator_name); - continue; - } - if (elv_support_iosched(q)) - len += sprintf(name+len, "%s ", __e->elevator_name); + list_for_each_entry(e, &elv_list, list) { + if (e == cur) + len += sprintf(name+len, "[%s] ", e->elevator_name); + else if (elv_support_features(q, e)) + len += sprintf(name+len, "%s ", e->elevator_name); } spin_unlock(&elv_list_lock); - if (q->elevator) - len += sprintf(name+len, "none"); - - len += sprintf(len+name, "\n"); + len += sprintf(name+len, "\n"); return len; } @@ -763,3 +813,12 @@ struct request *elv_rb_latter_request(struct request_queue *q, return NULL; } EXPORT_SYMBOL(elv_rb_latter_request); + +static int __init elevator_setup(char *str) +{ + pr_warn("Kernel parameter elevator= does not have any effect anymore.\n" + "Please use sysfs to set IO scheduler for individual devices.\n"); + return 1; +} + +__setup("elevator=", elevator_setup); |