aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c77
1 files changed, 55 insertions, 22 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2bdac578ee51..eee1711d9639 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -6,6 +6,7 @@
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
+#include <linux/compat.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/hdreg.h>
@@ -66,8 +67,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
* nvme_reset_wq - hosts nvme reset works
* nvme_delete_wq - hosts nvme delete works
*
- * nvme_wq will host works such are scan, aen handling, fw activation,
- * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq
+ * nvme_wq will host works such as scan, aen handling, fw activation,
+ * keep-alive, periodic reconnects etc. nvme_reset_wq
* runs reset works which also flush works hosted on nvme_wq for
* serialization purposes. nvme_delete_wq host controller deletion
* works which flush reset works for serialization.
@@ -565,8 +566,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_dsm_range *range;
struct bio *bio;
- range = kmalloc_array(segments, sizeof(*range),
- GFP_ATOMIC | __GFP_NOWARN);
+ /*
+ * Some devices do not consider the DSM 'Number of Ranges' field when
+ * determining how much data to DMA. Always allocate memory for maximum
+ * number of segments to prevent device reading beyond end of buffer.
+ */
+ static const size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES;
+
+ range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN);
if (!range) {
/*
* If we fail allocation our range, fallback to the controller
@@ -606,7 +613,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
req->special_vec.bv_page = virt_to_page(range);
req->special_vec.bv_offset = offset_in_page(range);
- req->special_vec.bv_len = sizeof(*range) * segments;
+ req->special_vec.bv_len = alloc_size;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
return BLK_STS_OK;
@@ -929,7 +936,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka)
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
static int nvme_keep_alive(struct nvme_ctrl *ctrl)
@@ -959,7 +966,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
dev_dbg(ctrl->device,
"reschedule traffic based keep-alive timer\n");
ctrl->comp_seen = false;
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
return;
}
@@ -976,7 +983,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
if (unlikely(ctrl->kato == 0))
return;
- schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
@@ -988,6 +995,19 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
+/*
+ * In NVMe 1.0 the CNS field was just a binary controller or namespace
+ * flag, thus sending any new CNS opcodes has a big chance of not working.
+ * Qemu unfortunately had that bug after reporting a 1.1 version compliance
+ * (but not for any later version).
+ */
+static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)
+ return ctrl->vs < NVME_VS(1, 2, 0);
+ return ctrl->vs < NVME_VS(1, 1, 0);
+}
+
static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
struct nvme_command c = { };
@@ -1120,8 +1140,8 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
void *buffer, size_t buflen, u32 *result)
{
+ union nvme_result res = { 0 };
struct nvme_command c;
- union nvme_result res;
int ret;
memset(&c, 0, sizeof(c));
@@ -1182,6 +1202,18 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl)
supported_aens);
}
+/*
+ * Convert integer values from ioctl structures to user pointers, silently
+ * ignoring the upper bits in the compat case to match behaviour of 32-bit
+ * kernels.
+ */
+static void __user *nvme_to_user_ptr(uintptr_t ptrval)
+{
+ if (in_compat_syscall())
+ ptrval = (compat_uptr_t)ptrval;
+ return (void __user *)ptrval;
+}
+
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
struct nvme_user_io io;
@@ -1205,7 +1237,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
length = (io.nblocks + 1) << ns->lba_shift;
meta_len = (io.nblocks + 1) * ns->ms;
- metadata = (void __user *)(uintptr_t)io.metadata;
+ metadata = nvme_to_user_ptr(io.metadata);
if (ns->ext) {
length += meta_len;
@@ -1228,7 +1260,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.appmask = cpu_to_le16(io.appmask);
return nvme_submit_user_cmd(ns->queue, &c,
- (void __user *)(uintptr_t)io.addr, length,
+ nvme_to_user_ptr(io.addr), length,
metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
}
@@ -1349,9 +1381,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
- (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
- (void __user *)(uintptr_t)cmd.metadata,
- cmd.metadata_len, 0, &result, timeout);
+ nvme_to_user_ptr(cmd.addr), cmd.data_len,
+ nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
+ 0, &result, timeout);
nvme_passthru_end(ctrl, effects);
if (status >= 0) {
@@ -1396,8 +1428,8 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
- (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
- (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len,
+ nvme_to_user_ptr(cmd.addr), cmd.data_len,
+ nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
0, &cmd.result, timeout);
nvme_passthru_end(ctrl, effects);
@@ -1750,7 +1782,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id);
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
- revalidate_disk(ns->head->disk);
+ nvme_mpath_update_disk_size(ns->head->disk);
}
#endif
}
@@ -2450,8 +2482,8 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
lockdep_assert_held(&nvme_subsystems_lock);
list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
- if (ctrl->state == NVME_CTRL_DELETING ||
- ctrl->state == NVME_CTRL_DEAD)
+ if (tmp->state == NVME_CTRL_DELETING ||
+ tmp->state == NVME_CTRL_DEAD)
continue;
if (tmp->cntlid == ctrl->cntlid) {
@@ -3414,6 +3446,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
return 0;
out_put_disk:
+ /* prevent double queue cleanup */
+ ns->disk->queue = NULL;
put_disk(ns->disk);
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
@@ -3587,8 +3621,7 @@ static void nvme_scan_work(struct work_struct *work)
mutex_lock(&ctrl->scan_lock);
nn = le32_to_cpu(id->nn);
- if (ctrl->vs >= NVME_VS(1, 1, 0) &&
- !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
+ if (!nvme_ctrl_limited_cns(ctrl)) {
if (!nvme_scan_ns_list(ctrl, nn))
goto out_free_id;
}
@@ -3686,7 +3719,7 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log)
return;
- if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
+ if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log,
sizeof(*log), 0))
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log);