aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-core.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c115
1 files changed, 66 insertions, 49 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index ee33590f54eb..eb8b52241453 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -42,7 +42,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-sched.h"
-#include "blk-wbt.h"
+#include "blk-rq-qos.h"
#ifdef CONFIG_DEBUG_FS
struct dentry *blk_debugfs_root;
@@ -715,6 +715,35 @@ void blk_set_queue_dying(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_set_queue_dying);
+/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */
+void blk_exit_queue(struct request_queue *q)
+{
+ /*
+ * Since the I/O scheduler exit code may access cgroup information,
+ * perform I/O scheduler exit before disassociating from the block
+ * cgroup controller.
+ */
+ if (q->elevator) {
+ ioc_clear_queue(q);
+ elevator_exit(q, q->elevator);
+ q->elevator = NULL;
+ }
+
+ /*
+ * Remove all references to @q from the block cgroup controller before
+ * restoring @q->queue_lock to avoid that restoring this pointer causes
+ * e.g. blkcg_print_blkgs() to crash.
+ */
+ blkcg_exit_queue(q);
+
+ /*
+ * Since the cgroup code may dereference the @q->backing_dev_info
+ * pointer, only decrease its reference count after having removed the
+ * association with the block cgroup controller.
+ */
+ bdi_put(q->backing_dev_info);
+}
+
/**
* blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown
@@ -762,9 +791,12 @@ void blk_cleanup_queue(struct request_queue *q)
* make sure all in-progress dispatch are completed because
* blk_freeze_queue() can only complete all requests, and
* dispatch may still be in-progress since we dispatch requests
- * from more than one contexts
+ * from more than one contexts.
+ *
+ * We rely on driver to deal with the race in case that queue
+ * initialization isn't done.
*/
- if (q->mq_ops)
+ if (q->mq_ops && blk_queue_init_done(q))
blk_mq_quiesce_queue(q);
/* for synchronous bio-based driver finish in-flight integrity i/o */
@@ -780,30 +812,7 @@ void blk_cleanup_queue(struct request_queue *q)
*/
WARN_ON_ONCE(q->kobj.state_in_sysfs);
- /*
- * Since the I/O scheduler exit code may access cgroup information,
- * perform I/O scheduler exit before disassociating from the block
- * cgroup controller.
- */
- if (q->elevator) {
- ioc_clear_queue(q);
- elevator_exit(q, q->elevator);
- q->elevator = NULL;
- }
-
- /*
- * Remove all references to @q from the block cgroup controller before
- * restoring @q->queue_lock to avoid that restoring this pointer causes
- * e.g. blkcg_print_blkgs() to crash.
- */
- blkcg_exit_queue(q);
-
- /*
- * Since the cgroup code may dereference the @q->backing_dev_info
- * pointer, only decrease its reference count after having removed the
- * association with the block cgroup controller.
- */
- bdi_put(q->backing_dev_info);
+ blk_exit_queue(q);
if (q->mq_ops)
blk_mq_free_queue(q);
@@ -1026,7 +1035,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
laptop_mode_timer_fn, 0);
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, NULL);
- INIT_LIST_HEAD(&q->queue_head);
INIT_LIST_HEAD(&q->timeout_list);
INIT_LIST_HEAD(&q->icq_list);
#ifdef CONFIG_BLK_CGROUP
@@ -1180,6 +1188,7 @@ out_exit_flush_rq:
q->exit_rq_fn(q, q->fq->flush_rq);
out_free_flush_queue:
blk_free_flush_queue(q->fq);
+ q->fq = NULL;
return -ENOMEM;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1641,7 +1650,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
- wbt_requeue(q->rq_wb, rq);
+ rq_qos_requeue(q, rq);
if (rq->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, rq);
@@ -1748,7 +1757,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
/* this is a bio leak */
WARN_ON(req->bio != NULL);
- wbt_done(q->rq_wb, req);
+ rq_qos_done(q, req);
/*
* Request may not have originated from ll_rw_blk. if not,
@@ -1982,7 +1991,6 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
int where = ELEVATOR_INSERT_SORT;
struct request *req, *free;
unsigned int request_count = 0;
- unsigned int wb_acct;
/*
* low level driver can indicate that it wants pages above a
@@ -2040,7 +2048,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
}
get_rq:
- wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
+ rq_qos_throttle(q, bio, q->queue_lock);
/*
* Grab a free request. This is might sleep but can not fail.
@@ -2050,7 +2058,7 @@ get_rq:
req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
if (IS_ERR(req)) {
blk_queue_exit(q);
- __wbt_done(q->rq_wb, wb_acct);
+ rq_qos_cleanup(q, bio);
if (PTR_ERR(req) == -ENOMEM)
bio->bi_status = BLK_STS_RESOURCE;
else
@@ -2059,7 +2067,7 @@ get_rq:
goto out_unlock;
}
- wbt_track(req, wb_acct);
+ rq_qos_track(q, req, bio);
/*
* After dropping the lock and possibly sleeping here, our request
@@ -2152,9 +2160,14 @@ static inline bool should_fail_request(struct hd_struct *part,
static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
{
- if (part->policy && op_is_write(bio_op(bio))) {
+ const int op = bio_op(bio);
+
+ if (part->policy && op_is_write(op)) {
char b[BDEVNAME_SIZE];
+ if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
+ return false;
+
WARN_ONCE(1,
"generic_make_request: Trying to write "
"to read-only block-device %s (partno %d)\n",
@@ -2700,13 +2713,13 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
void blk_account_io_completion(struct request *req, unsigned int bytes)
{
if (blk_do_io_stat(req)) {
- const int rw = rq_data_dir(req);
+ const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
int cpu;
cpu = part_stat_lock();
part = req->part;
- part_stat_add(cpu, part, sectors[rw], bytes >> 9);
+ part_stat_add(cpu, part, sectors[sgrp], bytes >> 9);
part_stat_unlock();
}
}
@@ -2719,19 +2732,17 @@ void blk_account_io_done(struct request *req, u64 now)
* containing request is enough.
*/
if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
- unsigned long duration;
- const int rw = rq_data_dir(req);
+ const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
int cpu;
- duration = nsecs_to_jiffies(now - req->start_time_ns);
cpu = part_stat_lock();
part = req->part;
- part_stat_inc(cpu, part, ios[rw]);
- part_stat_add(cpu, part, ticks[rw], duration);
+ part_stat_inc(cpu, part, ios[sgrp]);
+ part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns);
part_round_stats(req->q, cpu, part);
- part_dec_in_flight(req->q, part, rw);
+ part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
part_stat_unlock();
@@ -2751,9 +2762,9 @@ static bool blk_pm_allow_request(struct request *rq)
return rq->rq_flags & RQF_PM;
case RPM_SUSPENDED:
return false;
+ default:
+ return true;
}
-
- return true;
}
#else
static bool blk_pm_allow_request(struct request *rq)
@@ -2980,7 +2991,7 @@ void blk_start_request(struct request *req)
req->throtl_size = blk_rq_sectors(req);
#endif
req->rq_flags |= RQF_STATS;
- wbt_issue(req->q->rq_wb, req);
+ rq_qos_issue(req->q, req);
}
BUG_ON(blk_rq_is_complete(req));
@@ -3053,6 +3064,10 @@ EXPORT_SYMBOL_GPL(blk_steal_bios);
* Passing the result of blk_rq_bytes() as @nr_bytes guarantees
* %false return from this function.
*
+ * Note:
+ * The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in both
+ * blk_rq_bytes() and in blk_update_request().
+ *
* Return:
* %false - this request doesn't have any more data
* %true - this request has more data
@@ -3200,7 +3215,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
blk_account_io_done(req, now);
if (req->end_io) {
- wbt_done(req->q->rq_wb, req);
+ rq_qos_done(q, req);
req->end_io(req, error);
} else {
if (blk_bidi_rq(req))
@@ -3763,9 +3778,11 @@ EXPORT_SYMBOL(blk_finish_plug);
*/
void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
{
- /* not support for RQF_PM and ->rpm_status in blk-mq yet */
- if (q->mq_ops)
+ /* Don't enable runtime PM for blk-mq until it is ready */
+ if (q->mq_ops) {
+ pm_runtime_disable(dev);
return;
+ }
q->dev = dev;
q->rpm_status = RPM_ACTIVE;