aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bfq-iosched.c22
-rw-r--r--block/bio-integrity.c8
-rw-r--r--block/bio.c28
-rw-r--r--block/blk-cgroup.c8
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-iolatency.c51
-rw-r--r--block/blk-mq.c2
-rw-r--r--block/blk-rq-qos.c6
-rw-r--r--block/blk-sysfs.c3
-rw-r--r--block/blk-throttle.c9
-rw-r--r--block/blk-zoned.c48
11 files changed, 117 insertions, 69 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index e5db3856b194..b528710364e9 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2085,9 +2085,14 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
blk_rq_pos(container_of(rb_prev(&req->rb_node),
struct request, rb_node))) {
struct bfq_queue *bfqq = bfq_init_rq(req);
- struct bfq_data *bfqd = bfqq->bfqd;
+ struct bfq_data *bfqd;
struct request *prev, *next_rq;
+ if (!bfqq)
+ return;
+
+ bfqd = bfqq->bfqd;
+
/* Reposition request in its sort_list */
elv_rb_del(&bfqq->sort_list, req);
elv_rb_add(&bfqq->sort_list, req);
@@ -2134,6 +2139,9 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
struct bfq_queue *bfqq = bfq_init_rq(rq),
*next_bfqq = bfq_init_rq(next);
+ if (!bfqq)
+ return;
+
/*
* If next and rq belong to the same bfq_queue and next is older
* than rq, then reposition rq in the fifo (by substituting next
@@ -5061,12 +5069,12 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
spin_lock_irq(&bfqd->lock);
bfqq = bfq_init_rq(rq);
- if (at_head || blk_rq_is_passthrough(rq)) {
+ if (!bfqq || at_head || blk_rq_is_passthrough(rq)) {
if (at_head)
list_add(&rq->queuelist, &bfqd->dispatch);
else
list_add_tail(&rq->queuelist, &bfqd->dispatch);
- } else { /* bfqq is assumed to be non null here */
+ } else {
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
/*
* Update bfqq, because, if a queue merge has occurred
@@ -5398,8 +5406,14 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd,
* total service time, and there seem to be the right
* conditions to do it, or we can lower the last base value
* computed.
+ *
+ * NOTE: (bfqd->rq_in_driver == 1) means that there is no I/O
+ * request in flight, because this function is in the code
+ * path that handles the completion of a request of bfqq, and,
+ * in particular, this function is executed before
+ * bfqd->rq_in_driver is decremented in such a code path.
*/
- if ((bfqq->last_serv_time_ns == 0 && bfqd->rq_in_driver == 0) ||
+ if ((bfqq->last_serv_time_ns == 0 && bfqd->rq_in_driver == 1) ||
tot_time_ns < bfqq->last_serv_time_ns) {
bfqq->last_serv_time_ns = tot_time_ns;
/*
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 4db620849515..fb95dbb21dd8 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -276,8 +276,12 @@ bool bio_integrity_prep(struct bio *bio)
ret = bio_integrity_add_page(bio, virt_to_page(buf),
bytes, offset);
- if (ret == 0)
- return false;
+ if (ret == 0) {
+ printk(KERN_ERR "could not attach integrity payload\n");
+ kfree(buf);
+ status = BLK_STS_RESOURCE;
+ goto err_end_io;
+ }
if (ret < bytes)
break;
diff --git a/block/bio.c b/block/bio.c
index 67bba12d273b..121caeea3e00 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -16,6 +16,7 @@
#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <linux/blk-cgroup.h>
+#include <linux/highmem.h>
#include <trace/events/block.h>
#include "blk.h"
@@ -1479,8 +1480,22 @@ void bio_unmap_user(struct bio *bio)
bio_put(bio);
}
+static void bio_invalidate_vmalloc_pages(struct bio *bio)
+{
+#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+ if (bio->bi_private && !op_is_write(bio_op(bio))) {
+ unsigned long i, len = 0;
+
+ for (i = 0; i < bio->bi_vcnt; i++)
+ len += bio->bi_io_vec[i].bv_len;
+ invalidate_kernel_vmap_range(bio->bi_private, len);
+ }
+#endif
+}
+
static void bio_map_kern_endio(struct bio *bio)
{
+ bio_invalidate_vmalloc_pages(bio);
bio_put(bio);
}
@@ -1501,6 +1516,8 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = kaddr >> PAGE_SHIFT;
const int nr_pages = end - start;
+ bool is_vmalloc = is_vmalloc_addr(data);
+ struct page *page;
int offset, i;
struct bio *bio;
@@ -1508,6 +1525,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
if (!bio)
return ERR_PTR(-ENOMEM);
+ if (is_vmalloc) {
+ flush_kernel_vmap_range(data, len);
+ bio->bi_private = data;
+ }
+
offset = offset_in_page(kaddr);
for (i = 0; i < nr_pages; i++) {
unsigned int bytes = PAGE_SIZE - offset;
@@ -1518,7 +1540,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
if (bytes > len)
bytes = len;
- if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
+ if (!is_vmalloc)
+ page = virt_to_page(data);
+ else
+ page = vmalloc_to_page(data);
+ if (bio_add_pc_page(q, bio, page, bytes,
offset) < bytes) {
/* we don't support partial mappings */
bio_put(bio);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1f7127b03490..e4715b35d42c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1006,8 +1006,12 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
}
next:
if (has_stats) {
- off += scnprintf(buf+off, size-off, "\n");
- seq_commit(sf, off);
+ if (off < size - 1) {
+ off += scnprintf(buf+off, size-off, "\n");
+ seq_commit(sf, off);
+ } else {
+ seq_commit(sf, -1);
+ }
}
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 8340f69670d8..5183fca0818a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -117,6 +117,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->internal_tag = -1;
rq->start_time_ns = ktime_get_ns();
rq->part = NULL;
+ refcount_set(&rq->ref, 1);
}
EXPORT_SYMBOL(blk_rq_init);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index d22e61bced86..d973c38ee4fd 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -618,44 +618,26 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
inflight = atomic_dec_return(&rqw->inflight);
WARN_ON_ONCE(inflight < 0);
- if (iolat->min_lat_nsec == 0)
- goto next;
- iolatency_record_time(iolat, &bio->bi_issue, now,
- issue_as_root);
- window_start = atomic64_read(&iolat->window_start);
- if (now > window_start &&
- (now - window_start) >= iolat->cur_win_nsec) {
- if (atomic64_cmpxchg(&iolat->window_start,
- window_start, now) == window_start)
- iolatency_check_latencies(iolat, now);
+ /*
+ * If bi_status is BLK_STS_AGAIN, the bio wasn't actually
+ * submitted, so do not account for it.
+ */
+ if (iolat->min_lat_nsec && bio->bi_status != BLK_STS_AGAIN) {
+ iolatency_record_time(iolat, &bio->bi_issue, now,
+ issue_as_root);
+ window_start = atomic64_read(&iolat->window_start);
+ if (now > window_start &&
+ (now - window_start) >= iolat->cur_win_nsec) {
+ if (atomic64_cmpxchg(&iolat->window_start,
+ window_start, now) == window_start)
+ iolatency_check_latencies(iolat, now);
+ }
}
-next:
wake_up(&rqw->wait);
blkg = blkg->parent;
}
}
-static void blkcg_iolatency_cleanup(struct rq_qos *rqos, struct bio *bio)
-{
- struct blkcg_gq *blkg;
-
- blkg = bio->bi_blkg;
- while (blkg && blkg->parent) {
- struct rq_wait *rqw;
- struct iolatency_grp *iolat;
-
- iolat = blkg_to_lat(blkg);
- if (!iolat)
- goto next;
-
- rqw = &iolat->rq_wait;
- atomic_dec(&rqw->inflight);
- wake_up(&rqw->wait);
-next:
- blkg = blkg->parent;
- }
-}
-
static void blkcg_iolatency_exit(struct rq_qos *rqos)
{
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
@@ -667,7 +649,6 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
static struct rq_qos_ops blkcg_iolatency_ops = {
.throttle = blkcg_iolatency_throttle,
- .cleanup = blkcg_iolatency_cleanup,
.done_bio = blkcg_iolatency_done_bio,
.exit = blkcg_iolatency_exit,
};
@@ -778,8 +759,10 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
if (!oldval && val)
return 1;
- if (oldval && !val)
+ if (oldval && !val) {
+ blkcg_clear_delay(blkg);
return -1;
+ }
return 0;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ce0f5f4ede70..68106a41f90d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2674,8 +2674,6 @@ void blk_mq_release(struct request_queue *q)
struct blk_mq_hw_ctx *hctx, *next;
int i;
- cancel_delayed_work_sync(&q->requeue_work);
-
queue_for_each_hw_ctx(q, hctx, i)
WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 659ccb8b693f..06d024204f50 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -202,6 +202,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
return -1;
data->got_token = true;
+ smp_wmb();
list_del_init(&curr->entry);
wake_up_process(data->task);
return 1;
@@ -245,6 +246,7 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
do {
+ /* The memory barrier in set_task_state saves us here. */
if (data.got_token)
break;
if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
@@ -255,12 +257,14 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
* which means we now have two. Put our local token
* and wake anyone else potentially waiting for one.
*/
+ smp_rmb();
if (data.got_token)
cleanup_cb(rqw, private_data);
break;
}
io_schedule();
- has_sleeper = false;
+ has_sleeper = true;
+ set_current_state(TASK_UNINTERRUPTIBLE);
} while (1);
finish_wait(&rqw->wait, &data.wq);
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 977c659dcd18..9bfa3ea4ed63 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -892,6 +892,9 @@ static void __blk_release_queue(struct work_struct *work)
blk_free_queue_stats(q->stats);
+ if (queue_is_mq(q))
+ cancel_delayed_work_sync(&q->requeue_work);
+
blk_exit_queue(q);
blk_queue_free_zone_bitmaps(q);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 9ea7c0ecad10..8ab6c8153223 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -881,13 +881,10 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
u64 tmp;
- jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
-
- /* Slice has just started. Consider one slice interval */
- if (!jiffy_elapsed)
- jiffy_elapsed_rnd = tg->td->throtl_slice;
+ jiffy_elapsed = jiffies - tg->slice_start[rw];
- jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
+ /* Round up to the next throttle slice, wait time must be nonzero */
+ jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
/*
* jiffy_elapsed_rnd should not be a big value as minimum iops can be
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index ae7e91bd0618..0434e2846028 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -14,6 +14,9 @@
#include <linux/rbtree.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/sched/mm.h>
#include "blk.h"
@@ -70,7 +73,7 @@ EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
sector_t nr_sectors)
{
- unsigned long zone_sectors = blk_queue_zone_sectors(q);
+ sector_t zone_sectors = blk_queue_zone_sectors(q);
return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
}
@@ -373,22 +376,25 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
* Allocate an array of struct blk_zone to get nr_zones zone information.
* The allocated array may be smaller than nr_zones.
*/
-static struct blk_zone *blk_alloc_zones(int node, unsigned int *nr_zones)
+static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones)
{
- size_t size = *nr_zones * sizeof(struct blk_zone);
- struct page *page;
- int order;
-
- for (order = get_order(size); order >= 0; order--) {
- page = alloc_pages_node(node, GFP_NOIO | __GFP_ZERO, order);
- if (page) {
- *nr_zones = min_t(unsigned int, *nr_zones,
- (PAGE_SIZE << order) / sizeof(struct blk_zone));
- return page_address(page);
- }
+ struct blk_zone *zones;
+ size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES);
+
+ /*
+ * GFP_KERNEL here is meaningless as the caller task context has
+ * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones()
+ * with memalloc_noio_save().
+ */
+ zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL);
+ if (!zones) {
+ *nr_zones = 0;
+ return NULL;
}
- return NULL;
+ *nr_zones = nrz;
+
+ return zones;
}
void blk_queue_free_zone_bitmaps(struct request_queue *q)
@@ -415,6 +421,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
unsigned int i, rep_nr_zones = 0, z = 0, nrz;
struct blk_zone *zones = NULL;
+ unsigned int noio_flag;
sector_t sector = 0;
int ret = 0;
@@ -427,6 +434,12 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
return 0;
}
+ /*
+ * Ensure that all memory allocations in this context are done as
+ * if GFP_NOIO was specified.
+ */
+ noio_flag = memalloc_noio_save();
+
if (!blk_queue_is_zoned(q) || !nr_zones) {
nr_zones = 0;
goto update;
@@ -443,7 +456,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
/* Get zone information and initialize seq_zones_bitmap */
rep_nr_zones = nr_zones;
- zones = blk_alloc_zones(q->node, &rep_nr_zones);
+ zones = blk_alloc_zones(&rep_nr_zones);
if (!zones)
goto out;
@@ -480,8 +493,9 @@ update:
blk_mq_unfreeze_queue(q);
out:
- free_pages((unsigned long)zones,
- get_order(rep_nr_zones * sizeof(struct blk_zone)));
+ memalloc_noio_restore(noio_flag);
+
+ kvfree(zones);
kfree(seq_zones_wlock);
kfree(seq_zones_bitmap);