aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bfq-cgroup.c115
-rw-r--r--block/bfq-iosched.c50
-rw-r--r--block/bfq-iosched.h6
-rw-r--r--block/bio-integrity.c1
-rw-r--r--block/bio.c11
-rw-r--r--block/blk-cgroup.c4
-rw-r--r--block/blk-core.c5
-rw-r--r--block/blk-iocost.c21
-rw-r--r--block/blk-iolatency.c122
-rw-r--r--block/blk-merge.c2
-rw-r--r--block/blk-mq-debugfs.c3
-rw-r--r--block/blk-mq-sched.c71
-rw-r--r--block/blk-mq-sysfs.c11
-rw-r--r--block/blk-mq.c22
-rw-r--r--block/blk-stat.c2
-rw-r--r--block/blk-throttle.c2
-rw-r--r--block/opal_proto.h1
-rw-r--r--block/partition-generic.c7
-rw-r--r--block/partitions/amiga.c104
-rw-r--r--block/sed-opal.c38
20 files changed, 411 insertions, 187 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index c17eb794f0ae..59fd1b10b5f3 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -536,6 +536,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
*/
bfqg->bfqd = bfqd;
bfqg->active_entities = 0;
+ bfqg->online = true;
bfqg->rq_pos_tree = RB_ROOT;
}
@@ -564,28 +565,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg,
entity->sched_data = &parent->sched_data;
}
-static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
- struct blkcg *blkcg)
+static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
{
- struct blkcg_gq *blkg;
-
- blkg = blkg_lookup(blkcg, bfqd->queue);
- if (likely(blkg))
- return blkg_to_bfqg(blkg);
- return NULL;
-}
-
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
- struct blkcg *blkcg)
-{
- struct bfq_group *bfqg, *parent;
+ struct bfq_group *parent;
struct bfq_entity *entity;
- bfqg = bfq_lookup_bfqg(bfqd, blkcg);
-
- if (unlikely(!bfqg))
- return NULL;
-
/*
* Update chain of bfq_groups as we might be handling a leaf group
* which, along with some of its relatives, has not been hooked yet
@@ -602,8 +586,28 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
bfq_group_set_parent(curr_bfqg, parent);
}
}
+}
- return bfqg;
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
+{
+ struct blkcg_gq *blkg = bio->bi_blkg;
+ struct bfq_group *bfqg;
+
+ while (blkg) {
+ if (!blkg->online) {
+ blkg = blkg->parent;
+ continue;
+ }
+ bfqg = blkg_to_bfqg(blkg);
+ if (bfqg->online) {
+ bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
+ return bfqg;
+ }
+ blkg = blkg->parent;
+ }
+ bio_associate_blkg_from_css(bio,
+ &bfqg_to_blkg(bfqd->root_group)->blkcg->css);
+ return bfqd->root_group;
}
/**
@@ -679,25 +683,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* Move bic to blkcg, assuming that bfqd->lock is held; which makes
* sure that the reference to cgroup is valid across the call (see
* comments in bfq_bic_update_cgroup on this issue)
- *
- * NOTE: an alternative approach might have been to store the current
- * cgroup in bfqq and getting a reference to it, reducing the lookup
- * time here, at the price of slightly more complex code.
*/
-static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
- struct bfq_io_cq *bic,
- struct blkcg *blkcg)
+static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bfq_group *bfqg)
{
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
- struct bfq_group *bfqg;
struct bfq_entity *entity;
- bfqg = bfq_find_set_group(bfqd, blkcg);
-
- if (unlikely(!bfqg))
- bfqg = bfqd->root_group;
-
if (async_bfqq) {
entity = &async_bfqq->entity;
@@ -708,9 +702,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
}
if (sync_bfqq) {
- entity = &sync_bfqq->entity;
- if (entity->sched_data != &bfqg->sched_data)
- bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+ /* We are the only user of this bfqq, just move it */
+ if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+ bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ } else {
+ struct bfq_queue *bfqq;
+
+ /*
+ * The queue was merged to a different queue. Check
+ * that the merge chain still belongs to the same
+ * cgroup.
+ */
+ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+ if (bfqq->entity.sched_data !=
+ &bfqg->sched_data)
+ break;
+ if (bfqq) {
+ /*
+ * Some queue changed cgroup so the merge is
+ * not valid anymore. We cannot easily just
+ * cancel the merge (by clearing new_bfqq) as
+ * there may be other processes using this
+ * queue and holding refs to all queues below
+ * sync_bfqq->new_bfqq. Similarly if the merge
+ * already happened, we need to detach from
+ * bfqq now so that we cannot merge bio to a
+ * request from the old cgroup.
+ */
+ bfq_put_cooperator(sync_bfqq);
+ bfq_release_process_ref(bfqd, sync_bfqq);
+ bic_set_bfqq(bic, NULL, 1);
+ }
+ }
}
return bfqg;
@@ -719,20 +743,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
{
struct bfq_data *bfqd = bic_to_bfqd(bic);
- struct bfq_group *bfqg = NULL;
+ struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
uint64_t serial_nr;
- rcu_read_lock();
- serial_nr = __bio_blkcg(bio)->css.serial_nr;
+ serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
/*
* Check whether blkcg has changed. The condition may trigger
* spuriously on a newly created cic but there's no harm.
*/
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
- goto out;
+ return;
- bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+ /*
+ * New cgroup for this process. Make sure it is linked to bfq internal
+ * cgroup hierarchy.
+ */
+ bfq_link_bfqg(bfqd, bfqg);
+ __bfq_bic_change_cgroup(bfqd, bic, bfqg);
/*
* Update blkg_path for bfq_log_* functions. We cache this
* path, and update it here, for the following
@@ -785,8 +813,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
*/
blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
bic->blkcg_serial_nr = serial_nr;
-out:
- rcu_read_unlock();
}
/**
@@ -914,6 +940,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
put_async_queues:
bfq_put_async_queues(bfqd, bfqg);
+ bfqg->online = false;
spin_unlock_irqrestore(&bfqd->lock, flags);
/*
@@ -1402,7 +1429,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
}
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
{
return bfqd->root_group;
}
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index d46806182b05..c73c8b0f5e40 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -420,6 +420,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
*/
void bfq_schedule_dispatch(struct bfq_data *bfqd)
{
+ lockdep_assert_held(&bfqd->lock);
+
if (bfqd->queued != 0) {
bfq_log(bfqd, "schedule dispatch");
blk_mq_run_hw_queues(bfqd->queue, true);
@@ -2227,10 +2229,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
spin_lock_irq(&bfqd->lock);
- if (bic)
+ if (bic) {
+ /*
+ * Make sure cgroup info is uptodate for current process before
+ * considering the merge.
+ */
+ bfq_bic_update_cgroup(bic, bio);
+
bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
- else
+ } else {
bfqd->bio_bfqq = NULL;
+ }
bfqd->bio_bic = bic;
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
@@ -2260,8 +2269,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
return ELEVATOR_NO_MERGE;
}
-static struct bfq_queue *bfq_init_rq(struct request *rq);
-
static void bfq_request_merged(struct request_queue *q, struct request *req,
enum elv_merge type)
{
@@ -2270,7 +2277,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
blk_rq_pos(req) <
blk_rq_pos(container_of(rb_prev(&req->rb_node),
struct request, rb_node))) {
- struct bfq_queue *bfqq = bfq_init_rq(req);
+ struct bfq_queue *bfqq = RQ_BFQQ(req);
struct bfq_data *bfqd;
struct request *prev, *next_rq;
@@ -2322,8 +2329,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
static void bfq_requests_merged(struct request_queue *q, struct request *rq,
struct request *next)
{
- struct bfq_queue *bfqq = bfq_init_rq(rq),
- *next_bfqq = bfq_init_rq(next);
+ struct bfq_queue *bfqq = RQ_BFQQ(rq),
+ *next_bfqq = RQ_BFQQ(next);
if (!bfqq)
return;
@@ -2502,6 +2509,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
if (process_refs == 0 || new_process_refs == 0)
return NULL;
+ /*
+ * Make sure merged queues belong to the same parent. Parents could
+ * have changed since the time we decided the two queues are suitable
+ * for merging.
+ */
+ if (new_bfqq->entity.parent != bfqq->entity.parent)
+ return NULL;
+
bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
new_bfqq->pid);
@@ -4914,7 +4929,7 @@ void bfq_put_queue(struct bfq_queue *bfqq)
bfqg_and_blkg_put(bfqg);
}
-static void bfq_put_cooperator(struct bfq_queue *bfqq)
+void bfq_put_cooperator(struct bfq_queue *bfqq)
{
struct bfq_queue *__bfqq, *next;
@@ -5145,14 +5160,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
struct bfq_queue *bfqq;
struct bfq_group *bfqg;
- rcu_read_lock();
-
- bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
- if (!bfqg) {
- bfqq = &bfqd->oom_bfqq;
- goto out;
- }
-
+ bfqg = bfq_bio_bfqg(bfqd, bio);
if (!is_sync) {
async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
ioprio);
@@ -5196,7 +5204,6 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
out:
bfqq->ref++; /* get a process reference to this queue */
bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref);
- rcu_read_unlock();
return bfqq;
}
@@ -5499,6 +5506,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
unsigned int cmd_flags) {}
#endif /* CONFIG_BFQ_CGROUP_DEBUG */
+static struct bfq_queue *bfq_init_rq(struct request *rq);
+
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head)
{
@@ -5509,17 +5518,14 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
unsigned int cmd_flags;
spin_lock_irq(&bfqd->lock);
+ bfqq = bfq_init_rq(rq);
if (blk_mq_sched_try_insert_merge(q, rq)) {
spin_unlock_irq(&bfqd->lock);
return;
}
- spin_unlock_irq(&bfqd->lock);
-
blk_mq_sched_request_inserted(rq);
- spin_lock_irq(&bfqd->lock);
- bfqq = bfq_init_rq(rq);
if (!bfqq || at_head || blk_rq_is_passthrough(rq)) {
if (at_head)
list_add(&rq->queuelist, &bfqd->dispatch);
@@ -6253,8 +6259,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_bfqq_expire(bfqd, bfqq, true, reason);
schedule_dispatch:
- spin_unlock_irqrestore(&bfqd->lock, flags);
bfq_schedule_dispatch(bfqd);
+ spin_unlock_irqrestore(&bfqd->lock, flags);
}
/*
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index de98fdfe9ea1..f6cc2b418008 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -896,6 +896,8 @@ struct bfq_group {
/* reference counter (see comments in bfq_bic_update_cgroup) */
int ref;
+ /* Is bfq_group still online? */
+ bool online;
struct bfq_entity entity;
struct bfq_sched_data sched_data;
@@ -949,6 +951,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool compensate, enum bfqq_expiration reason);
void bfq_put_queue(struct bfq_queue *bfqq);
+void bfq_put_cooperator(struct bfq_queue *bfqq);
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_schedule_dispatch(struct bfq_data *bfqd);
@@ -975,8 +978,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
void bfq_end_wr_async(struct bfq_data *bfqd);
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
- struct blkcg *blkcg);
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio);
struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index ec295be93ca0..247f7c480e66 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -424,6 +424,7 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
bip->bip_vcnt = bip_src->bip_vcnt;
bip->bip_iter = bip_src->bip_iter;
+ bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
return 0;
}
diff --git a/block/bio.c b/block/bio.c
index 40004a3631a8..e3d3e75c97e0 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -710,7 +710,7 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
if ((addr1 | mask) != (addr2 | mask))
return false;
- if (bv->bv_len + len > queue_max_segment_size(q))
+ if (len > queue_max_segment_size(q) - bv->bv_len)
return false;
return __bio_try_merge_page(bio, page, len, offset, same_page);
}
@@ -884,7 +884,7 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
return;
bio_for_each_segment_all(bvec, bio, iter_all) {
- if (mark_dirty && !PageCompound(bvec->bv_page))
+ if (mark_dirty)
set_page_dirty_lock(bvec->bv_page);
put_page(bvec->bv_page);
}
@@ -1691,8 +1691,7 @@ void bio_set_pages_dirty(struct bio *bio)
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
+ set_page_dirty_lock(bvec->bv_page);
}
}
@@ -1740,7 +1739,7 @@ void bio_check_pages_dirty(struct bio *bio)
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bvec, bio, iter_all) {
- if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
+ if (!PageDirty(bvec->bv_page))
goto defer;
}
@@ -2179,7 +2178,7 @@ void bio_clone_blkg_association(struct bio *dst, struct bio *src)
rcu_read_lock();
if (src->bi_blkg)
- __bio_associate_blkg(dst, src->bi_blkg);
+ bio_associate_blkg_from_css(dst, &bio_blkcg(src)->css);
rcu_read_unlock();
}
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index dde8d0acfb34..cd085a0e5e4a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1445,6 +1445,10 @@ retry:
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
pol->pd_init_fn(blkg->pd[pol->plid]);
+ if (pol->pd_online_fn)
+ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+ pol->pd_online_fn(blkg->pd[pol->plid]);
+
__set_bit(pol->plid, q->blkcg_pols);
ret = 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 5808baa950c3..030de4fdf9b1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -793,10 +793,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return false;
-
- WARN_ONCE(1,
- "generic_make_request: Trying to write "
- "to read-only block-device %s (partno %d)\n",
+ pr_warn("Trying to write to read-only block-device %s (partno %d)\n",
bio_devname(bio, b), part->partno);
/* Older lvm-tools actually trigger this */
return false;
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index ef287c33d6d9..e7d5aafa5e99 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -248,7 +248,9 @@ enum {
/* 1/64k is granular enough and can easily be handled w/ u32 */
HWEIGHT_WHOLE = 1 << 16,
+};
+enum {
/*
* As vtime is used to calculate the cost of each IO, it needs to
* be fairly high precision. For example, it should be able to
@@ -271,6 +273,11 @@ enum {
VRATE_MIN = VTIME_PER_USEC * VRATE_MIN_PPM / MILLION,
VRATE_CLAMP_ADJ_PCT = 4,
+ /* switch iff the conditions are met for longer than this */
+ AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC,
+};
+
+enum {
/* if IOs end up waiting for requests, issue less */
RQ_WAIT_BUSY_PCT = 5,
@@ -288,9 +295,6 @@ enum {
SURPLUS_SCALE_ABS = HWEIGHT_WHOLE / 50, /* + 2% */
SURPLUS_MIN_ADJ_DELTA = HWEIGHT_WHOLE / 33, /* 3% */
- /* switch iff the conditions are met for longer than this */
- AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC,
-
/*
* Count IO size in 4k pages. The 12bit shift helps keeping
* size-proportional components of cost calculation in closer
@@ -785,9 +789,14 @@ static void calc_lcoefs(u64 bps, u64 seqiops, u64 randiops,
*page = *seqio = *randio = 0;
- if (bps)
- *page = DIV64_U64_ROUND_UP(VTIME_PER_SEC,
- DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE));
+ if (bps) {
+ u64 bps_pages = DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE);
+
+ if (bps_pages)
+ *page = DIV64_U64_ROUND_UP(VTIME_PER_SEC, bps_pages);
+ else
+ *page = 1;
+ }
if (seqiops) {
v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, seqiops);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 71a82528d4bf..a4156b3b33c3 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -86,7 +86,17 @@ struct iolatency_grp;
struct blk_iolatency {
struct rq_qos rqos;
struct timer_list timer;
- atomic_t enabled;
+
+ /*
+ * ->enabled is the master enable switch gating the throttling logic and
+ * inflight tracking. The number of cgroups which have iolat enabled is
+ * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
+ * from ->enable_work with the request_queue frozen. For details, See
+ * blkiolatency_enable_work_fn().
+ */
+ bool enabled;
+ atomic_t enable_cnt;
+ struct work_struct enable_work;
};
static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
@@ -94,11 +104,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
return container_of(rqos, struct blk_iolatency, rqos);
}
-static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
-{
- return atomic_read(&blkiolat->enabled) > 0;
-}
-
struct child_latency_info {
spinlock_t lock;
@@ -463,7 +468,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
struct blkcg_gq *blkg = bio->bi_blkg;
bool issue_as_root = bio_issue_as_root_blkg(bio);
- if (!blk_iolatency_enabled(blkiolat))
+ if (!blkiolat->enabled)
return;
while (blkg && blkg->parent) {
@@ -593,7 +598,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
u64 window_start;
u64 now = ktime_to_ns(ktime_get());
bool issue_as_root = bio_issue_as_root_blkg(bio);
- bool enabled = false;
int inflight = 0;
blkg = bio->bi_blkg;
@@ -604,8 +608,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
if (!iolat)
return;
- enabled = blk_iolatency_enabled(iolat->blkiolat);
- if (!enabled)
+ if (!iolat->blkiolat->enabled)
return;
while (blkg && blkg->parent) {
@@ -643,6 +646,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
del_timer_sync(&blkiolat->timer);
+ flush_work(&blkiolat->enable_work);
blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
kfree(blkiolat);
}
@@ -714,6 +718,44 @@ next:
rcu_read_unlock();
}
+/**
+ * blkiolatency_enable_work_fn - Enable or disable iolatency on the device
+ * @work: enable_work of the blk_iolatency of interest
+ *
+ * iolatency needs to keep track of the number of in-flight IOs per cgroup. This
+ * is relatively expensive as it involves walking up the hierarchy twice for
+ * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
+ * want to disable the in-flight tracking.
+ *
+ * We have to make sure that the counting is balanced - we don't want to leak
+ * the in-flight counts by disabling accounting in the completion path while IOs
+ * are in flight. This is achieved by ensuring that no IO is in flight by
+ * freezing the queue while flipping ->enabled. As this requires a sleepable
+ * context, ->enabled flipping is punted to this work function.
+ */
+static void blkiolatency_enable_work_fn(struct work_struct *work)
+{
+ struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
+ enable_work);
+ bool enabled;
+
+ /*
+ * There can only be one instance of this function running for @blkiolat
+ * and it's guaranteed to be executed at least once after the latest
+ * ->enabled_cnt modification. Acting on the latest ->enable_cnt is
+ * sufficient.
+ *
+ * Also, we know @blkiolat is safe to access as ->enable_work is flushed
+ * in blkcg_iolatency_exit().
+ */
+ enabled = atomic_read(&blkiolat->enable_cnt);
+ if (enabled != blkiolat->enabled) {
+ blk_mq_freeze_queue(blkiolat->rqos.q);
+ blkiolat->enabled = enabled;
+ blk_mq_unfreeze_queue(blkiolat->rqos.q);
+ }
+}
+
int blk_iolatency_init(struct request_queue *q)
{
struct blk_iolatency *blkiolat;
@@ -739,17 +781,15 @@ int blk_iolatency_init(struct request_queue *q)
}
timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
+ INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
return 0;
}
-/*
- * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
- * return 0.
- */
-static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
{
struct iolatency_grp *iolat = blkg_to_lat(blkg);
+ struct blk_iolatency *blkiolat = iolat->blkiolat;
u64 oldval = iolat->min_lat_nsec;
iolat->min_lat_nsec = val;
@@ -757,13 +797,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
BLKIOLATENCY_MAX_WIN_SIZE);
- if (!oldval && val)
- return 1;
+ if (!oldval && val) {
+ if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
+ schedule_work(&blkiolat->enable_work);
+ }
if (oldval && !val) {
blkcg_clear_delay(blkg);
- return -1;
+ if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
+ schedule_work(&blkiolat->enable_work);
}
- return 0;
}
static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -795,7 +837,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
u64 lat_val = 0;
u64 oldval;
int ret;
- int enable = 0;
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
if (ret)
@@ -830,41 +871,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
blkg = ctx.blkg;
oldval = iolat->min_lat_nsec;
- enable = iolatency_set_min_lat_nsec(blkg, lat_val);
- if (enable) {
- if (!blk_get_queue(blkg->q)) {
- ret = -ENODEV;
- goto out;
- }
-
- blkg_get(blkg);
- }
-
- if (oldval != iolat->min_lat_nsec) {
+ iolatency_set_min_lat_nsec(blkg, lat_val);
+ if (oldval != iolat->min_lat_nsec)
iolatency_clear_scaling(blkg);
- }
-
ret = 0;
out:
blkg_conf_finish(&ctx);
- if (ret == 0 && enable) {
- struct iolatency_grp *tmp = blkg_to_lat(blkg);
- struct blk_iolatency *blkiolat = tmp->blkiolat;
-
- blk_mq_freeze_queue(blkg->q);
-
- if (enable == 1)
- atomic_inc(&blkiolat->enabled);
- else if (enable == -1)
- atomic_dec(&blkiolat->enabled);
- else
- WARN_ON_ONCE(1);
-
- blk_mq_unfreeze_queue(blkg->q);
-
- blkg_put(blkg);
- blk_put_queue(blkg->q);
- }
return ret ?: nbytes;
}
@@ -1005,14 +1017,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
struct blkcg_gq *blkg = lat_to_blkg(iolat);
- struct blk_iolatency *blkiolat = iolat->blkiolat;
- int ret;
- ret = iolatency_set_min_lat_nsec(blkg, 0);
- if (ret == 1)
- atomic_inc(&blkiolat->enabled);
- if (ret == -1)
- atomic_dec(&blkiolat->enabled);
+ iolatency_set_min_lat_nsec(blkg, 0);
iolatency_clear_scaling(blkg);
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5219064cd72b..711fa0fcd211 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -477,7 +477,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
struct scatterlist *sglist,
struct scatterlist **sg)
{
- struct bio_vec uninitialized_var(bvec), bvprv = { NULL };
+ struct bio_vec bvec, bvprv = { NULL };
struct bvec_iter iter;
int nsegs = 0;
bool new_bio = false;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 121f4c1e0697..772a6c6b1634 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -883,6 +883,9 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
char name[20];
int i;
+ if (!q->debugfs_dir)
+ return;
+
snprintf(name, sizeof(name), "hctx%u", hctx->queue_num);
hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index f422c7feea7e..126a9a635594 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -59,8 +59,7 @@ void blk_mq_sched_assign_ioc(struct request *rq)
}
/*
- * Mark a hardware queue as needing a restart. For shared queues, maintain
- * a count of how many hardware queues are marked for restart.
+ * Mark a hardware queue as needing a restart.
*/
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
@@ -92,13 +91,17 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
/*
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
* its queue by itself in its completion handler, so we don't need to
- * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
+ * restart queue if .get_budget() fails to get the budget.
+ *
+ * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
+ * be run again. This is necessary to avoid starving flushes.
*/
-static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
LIST_HEAD(rq_list);
+ int ret = 0;
do {
struct request *rq;
@@ -106,6 +109,11 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
break;
+ if (!list_empty_careful(&hctx->dispatch)) {
+ ret = -EAGAIN;
+ break;
+ }
+
if (!blk_mq_get_dispatch_budget(hctx))
break;
@@ -122,6 +130,8 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
*/
list_add(&rq->queuelist, &rq_list);
} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
+
+ return ret;
}
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
@@ -138,17 +148,26 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
/*
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
* its queue by itself in its completion handler, so we don't need to
- * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
+ * restart queue if .get_budget() fails to get the budget.
+ *
+ * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
+ * to be run again. This is necessary to avoid starving flushes.
*/
-static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
+static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
LIST_HEAD(rq_list);
struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
+ int ret = 0;
do {
struct request *rq;
+ if (!list_empty_careful(&hctx->dispatch)) {
+ ret = -EAGAIN;
+ break;
+ }
+
if (!sbitmap_any_bit_set(&hctx->ctx_map))
break;
@@ -174,21 +193,17 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
WRITE_ONCE(hctx->dispatch_from, ctx);
+ return ret;
}
-void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
+int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
+ int ret = 0;
LIST_HEAD(rq_list);
- /* RCU or SRCU read lock is needed before checking quiesced flag */
- if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
- return;
-
- hctx->run++;
-
/*
* If we have previous entries on our dispatch list, grab them first for
* more fair dispatch.
@@ -217,19 +232,41 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
blk_mq_sched_mark_restart_hctx(hctx);
if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
if (has_sched_dispatch)
- blk_mq_do_dispatch_sched(hctx);
+ ret = blk_mq_do_dispatch_sched(hctx);
else
- blk_mq_do_dispatch_ctx(hctx);
+ ret = blk_mq_do_dispatch_ctx(hctx);
}
} else if (has_sched_dispatch) {
- blk_mq_do_dispatch_sched(hctx);
+ ret = blk_mq_do_dispatch_sched(hctx);
} else if (hctx->dispatch_busy) {
/* dequeue request one by one from sw queue if queue is busy */
- blk_mq_do_dispatch_ctx(hctx);
+ ret = blk_mq_do_dispatch_ctx(hctx);
} else {
blk_mq_flush_busy_ctxs(hctx, &rq_list);
blk_mq_dispatch_rq_list(q, &rq_list, false);
}
+
+ return ret;
+}
+
+void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
+{
+ struct request_queue *q = hctx->queue;
+
+ /* RCU or SRCU read lock is needed before checking quiesced flag */
+ if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
+ return;
+
+ hctx->run++;
+
+ /*
+ * A return of -EAGAIN is an indication that hctx->dispatch is not
+ * empty and we must run again in order to avoid starving flushes.
+ */
+ if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
+ if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
+ blk_mq_run_hw_queue(hctx, true);
+ }
}
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 5dafd7a8ec91..7abd66d1228a 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -250,7 +250,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct blk_mq_ctx *ctx;
- int i, ret;
+ int i, j, ret;
if (!hctx->nr_ctx)
return 0;
@@ -262,9 +262,16 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
hctx_for_each_ctx(hctx, ctx, i) {
ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
if (ret)
- break;
+ goto out;
}
+ return 0;
+out:
+ hctx_for_each_ctx(hctx, ctx, j) {
+ if (j < i)
+ kobject_del(&ctx->kobj);
+ }
+ kobject_del(&hctx->kobj);
return ret;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 84798d09ca46..03f4eb37dfc7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1112,7 +1112,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
- struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
+ struct sbitmap_queue *sbq;
struct wait_queue_head *wq;
wait_queue_entry_t *wait;
bool ret;
@@ -1135,6 +1135,10 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
if (!list_empty_careful(&wait->entry))
return false;
+ if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag))
+ sbq = &hctx->tags->breserved_tags;
+ else
+ sbq = &hctx->tags->bitmap_tags;
wq = &bt_wait_ptr(sbq, hctx)->wait;
spin_lock_irq(&wq->lock);
@@ -1150,6 +1154,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
__add_wait_queue(wq, wait);
/*
+ * Add one explicit barrier since blk_mq_get_driver_tag() may
+ * not imply barrier in case of failure.
+ *
+ * Order adding us to wait queue and allocating driver tag.
+ *
+ * The pair is the one implied in sbitmap_queue_wake_up() which
+ * orders clearing sbitmap tag bits and waitqueue_active() in
+ * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless
+ *
+ * Otherwise, re-order of adding wait queue and getting driver tag
+ * may cause __sbitmap_queue_wake_up() to wake up nothing because
+ * the waitqueue_active() may not observe us in wait queue.
+ */
+ smp_mb();
+
+ /*
* It's possible that a tag was freed in the window between the
* allocation failure and adding the hardware queue to the wait
* queue.
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 940f15d600f8..af482d8f9f7a 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -28,7 +28,7 @@ void blk_rq_stat_init(struct blk_rq_stat *stat)
/* src is a per-cpu stat, mean isn't initialized */
void blk_rq_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
{
- if (!src->nr_samples)
+ if (dst->nr_samples + src->nr_samples <= dst->nr_samples)
return;
dst->min = min(dst->min, src->min);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index bd870f9ae458..43444934b985 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1374,6 +1374,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE),
tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE));
+ rcu_read_lock();
/*
* Update has_rules[] flags for the updated tg's subtree. A tg is
* considered to have rules if either the tg itself or any of its
@@ -1401,6 +1402,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
this_tg->latency_target = max(this_tg->latency_target,
parent_tg->latency_target);
}
+ rcu_read_unlock();
/*
* We're already holding queue_lock and know @tg is valid. Let's
diff --git a/block/opal_proto.h b/block/opal_proto.h
index 5532412d567c..354b4e93bf63 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -65,6 +65,7 @@ enum opal_response_token {
#define SHORT_ATOM_BYTE 0xBF
#define MEDIUM_ATOM_BYTE 0xDF
#define LONG_ATOM_BYTE 0xE3
+#define EMPTY_ATOM_BYTE 0xFF
#define OPAL_INVAL_PARAM 12
#define OPAL_MANUFACTURED_INACTIVE 0x08
diff --git a/block/partition-generic.c b/block/partition-generic.c
index aee643ce13d1..e69452c1f5ad 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -272,6 +272,7 @@ void delete_partition(struct gendisk *disk, int partno)
struct disk_part_tbl *ptbl =
rcu_dereference_protected(disk->part_tbl, 1);
struct hd_struct *part;
+ struct block_device *bdev;
if (partno >= ptbl->len)
return;
@@ -292,6 +293,12 @@ void delete_partition(struct gendisk *disk, int partno)
* "in-use" until we really free the gendisk.
*/
blk_invalidate_devt(part_devt(part));
+
+ bdev = bdget(part_devt(part));
+ if (bdev) {
+ remove_inode_hash(bdev->bd_inode);
+ bdput(bdev);
+ }
hd_struct_kill(part);
}
diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c
index 560936617d9c..484a51bce39b 100644
--- a/block/partitions/amiga.c
+++ b/block/partitions/amiga.c
@@ -11,11 +11,19 @@
#define pr_fmt(fmt) fmt
#include <linux/types.h>
+#include <linux/mm_types.h>
+#include <linux/overflow.h>
#include <linux/affs_hardblocks.h>
#include "check.h"
#include "amiga.h"
+/* magic offsets in partition DosEnvVec */
+#define NR_HD 3
+#define NR_SECT 5
+#define LO_CYL 9
+#define HI_CYL 10
+
static __inline__ u32
checksum_block(__be32 *m, int size)
{
@@ -32,8 +40,12 @@ int amiga_partition(struct parsed_partitions *state)
unsigned char *data;
struct RigidDiskBlock *rdb;
struct PartitionBlock *pb;
- int start_sect, nr_sects, blk, part, res = 0;
- int blksize = 1; /* Multiplier for disk block size */
+ u64 start_sect, nr_sects;
+ sector_t blk, end_sect;
+ u32 cylblk; /* rdb_CylBlocks = nr_heads*sect_per_track */
+ u32 nr_hd, nr_sect, lo_cyl, hi_cyl;
+ int part, res = 0;
+ unsigned int blksize = 1; /* Multiplier for disk block size */
int slot = 1;
char b[BDEVNAME_SIZE];
@@ -43,7 +55,7 @@ int amiga_partition(struct parsed_partitions *state)
data = read_part_sector(state, blk, &sect);
if (!data) {
if (warn_no_part)
- pr_err("Dev %s: unable to read RDB block %d\n",
+ pr_err("Dev %s: unable to read RDB block %llu\n",
bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
@@ -60,12 +72,12 @@ int amiga_partition(struct parsed_partitions *state)
*(__be32 *)(data+0xdc) = 0;
if (checksum_block((__be32 *)data,
be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) {
- pr_err("Trashed word at 0xd0 in block %d ignored in checksum calculation\n",
+ pr_err("Trashed word at 0xd0 in block %llu ignored in checksum calculation\n",
blk);
break;
}
- pr_err("Dev %s: RDB in block %d has bad checksum\n",
+ pr_err("Dev %s: RDB in block %llu has bad checksum\n",
bdevname(state->bdev, b), blk);
}
@@ -81,12 +93,17 @@ int amiga_partition(struct parsed_partitions *state)
}
blk = be32_to_cpu(rdb->rdb_PartitionList);
put_dev_sector(sect);
- for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
- blk *= blksize; /* Read in terms partition table understands */
+ for (part = 1; (s32) blk>0 && part<=16; part++, put_dev_sector(sect)) {
+ /* Read in terms partition table understands */
+ if (check_mul_overflow(blk, (sector_t) blksize, &blk)) {
+ pr_err("Dev %s: overflow calculating partition block %llu! Skipping partitions %u and beyond\n",
+ bdevname(state->bdev, b), blk, part);
+ break;
+ }
data = read_part_sector(state, blk, &sect);
if (!data) {
if (warn_no_part)
- pr_err("Dev %s: unable to read partition block %d\n",
+ pr_err("Dev %s: unable to read partition block %llu\n",
bdevname(state->bdev, b), blk);
res = -1;
goto rdb_done;
@@ -98,19 +115,70 @@ int amiga_partition(struct parsed_partitions *state)
if (checksum_block((__be32 *)pb, be32_to_cpu(pb->pb_SummedLongs) & 0x7F) != 0 )
continue;
- /* Tell Kernel about it */
+ /* RDB gives us more than enough rope to hang ourselves with,
+ * many times over (2^128 bytes if all fields max out).
+ * Some careful checks are in order, so check for potential
+ * overflows.
+ * We are multiplying four 32 bit numbers to one sector_t!
+ */
+
+ nr_hd = be32_to_cpu(pb->pb_Environment[NR_HD]);
+ nr_sect = be32_to_cpu(pb->pb_Environment[NR_SECT]);
+
+ /* CylBlocks is total number of blocks per cylinder */
+ if (check_mul_overflow(nr_hd, nr_sect, &cylblk)) {
+ pr_err("Dev %s: heads*sects %u overflows u32, skipping partition!\n",
+ bdevname(state->bdev, b), cylblk);
+ continue;
+ }
+
+ /* check for consistency with RDB defined CylBlocks */
+ if (cylblk > be32_to_cpu(rdb->rdb_CylBlocks)) {
+ pr_warn("Dev %s: cylblk %u > rdb_CylBlocks %u!\n",
+ bdevname(state->bdev, b), cylblk,
+ be32_to_cpu(rdb->rdb_CylBlocks));
+ }
+
+ /* RDB allows for variable logical block size -
+ * normalize to 512 byte blocks and check result.
+ */
+
+ if (check_mul_overflow(cylblk, blksize, &cylblk)) {
+ pr_err("Dev %s: partition %u bytes per cyl. overflows u32, skipping partition!\n",
+ bdevname(state->bdev, b), part);
+ continue;
+ }
+
+ /* Calculate partition start and end. Limit of 32 bit on cylblk
+ * guarantees no overflow occurs if LBD support is enabled.
+ */
+
+ lo_cyl = be32_to_cpu(pb->pb_Environment[LO_CYL]);
+ start_sect = ((u64) lo_cyl * cylblk);
+
+ hi_cyl = be32_to_cpu(pb->pb_Environment[HI_CYL]);
+ nr_sects = (((u64) hi_cyl - lo_cyl + 1) * cylblk);
- nr_sects = (be32_to_cpu(pb->pb_Environment[10]) + 1 -
- be32_to_cpu(pb->pb_Environment[9])) *
- be32_to_cpu(pb->pb_Environment[3]) *
- be32_to_cpu(pb->pb_Environment[5]) *
- blksize;
if (!nr_sects)
continue;
- start_sect = be32_to_cpu(pb->pb_Environment[9]) *
- be32_to_cpu(pb->pb_Environment[3]) *
- be32_to_cpu(pb->pb_Environment[5]) *
- blksize;
+
+ /* Warn user if partition end overflows u32 (AmigaDOS limit) */
+
+ if ((start_sect + nr_sects) > UINT_MAX) {
+ pr_warn("Dev %s: partition %u (%llu-%llu) needs 64 bit device support!\n",
+ bdevname(state->bdev, b), part,
+ start_sect, start_sect + nr_sects);
+ }
+
+ if (check_add_overflow(start_sect, nr_sects, &end_sect)) {
+ pr_err("Dev %s: partition %u (%llu-%llu) needs LBD device support, skipping partition!\n",
+ bdevname(state->bdev, b), part,
+ start_sect, end_sect);
+ continue;
+ }
+
+ /* Tell Kernel about it */
+
put_partition(state,slot++,start_sect,nr_sects);
{
/* Be even more informative to aid mounting */
diff --git a/block/sed-opal.c b/block/sed-opal.c
index b4c761973ac1..1a0876251529 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -88,8 +88,8 @@ struct opal_dev {
u64 lowest_lba;
size_t pos;
- u8 cmd[IO_BUFFER_LENGTH];
- u8 resp[IO_BUFFER_LENGTH];
+ u8 *cmd;
+ u8 *resp;
struct parsed_resp parsed;
size_t prev_d_len;
@@ -893,16 +893,20 @@ static int response_parse(const u8 *buf, size_t length,
token_length = response_parse_medium(iter, pos);
else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */
token_length = response_parse_long(iter, pos);
+ else if (pos[0] == EMPTY_ATOM_BYTE) /* empty atom */
+ token_length = 1;
else /* TOKEN */
token_length = response_parse_token(iter, pos);
if (token_length < 0)
return token_length;
+ if (pos[0] != EMPTY_ATOM_BYTE)
+ num_entries++;
+
pos += token_length;
total -= token_length;
iter++;
- num_entries++;
}
resp->num = num_entries;
@@ -2019,6 +2023,8 @@ void free_opal_dev(struct opal_dev *dev)
return;
clean_opal_dev(dev);
+ kfree(dev->resp);
+ kfree(dev->cmd);
kfree(dev);
}
EXPORT_SYMBOL(free_opal_dev);
@@ -2031,17 +2037,39 @@ struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv)
if (!dev)
return NULL;
+ /*
+ * Presumably DMA-able buffers must be cache-aligned. Kmalloc makes
+ * sure the allocated buffer is DMA-safe in that regard.
+ */
+ dev->cmd = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL);
+ if (!dev->cmd)
+ goto err_free_dev;
+
+ dev->resp = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL);
+ if (!dev->resp)
+ goto err_free_cmd;
+
INIT_LIST_HEAD(&dev->unlk_lst);
mutex_init(&dev->dev_lock);
dev->data = data;
dev->send_recv = send_recv;
if (check_opal_support(dev) != 0) {
pr_debug("Opal is not supported on this device\n");
- kfree(dev);
- return NULL;
+ goto err_free_resp;
}
return dev;
+
+err_free_resp:
+ kfree(dev->resp);
+
+err_free_cmd:
+ kfree(dev->cmd);
+
+err_free_dev:
+ kfree(dev);
+
+ return NULL;
}
EXPORT_SYMBOL(init_opal_dev);