aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig9
-rw-r--r--drivers/block/Makefile2
-rw-r--r--drivers/block/aoe/aoecmd.c12
-rw-r--r--drivers/block/aoe/aoenet.c1
-rw-r--r--drivers/block/drbd/drbd_main.c4
-rw-r--r--drivers/block/drbd/drbd_nl.c6
-rw-r--r--drivers/block/drbd/drbd_receiver.c2
-rw-r--r--drivers/block/loop.c182
-rw-r--r--drivers/block/nbd.c19
-rw-r--r--drivers/block/null_blk_main.c14
-rw-r--r--drivers/block/rbd.c537
-rw-r--r--drivers/block/sunvdc.c2
-rw-r--r--drivers/block/sx8.c1586
-rw-r--r--drivers/block/virtio_blk.c7
-rw-r--r--drivers/block/xen-blkfront.c3
15 files changed, 450 insertions, 1936 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 8a08cbb958d1..c867211f5ed7 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -299,15 +299,6 @@ config BLK_DEV_SKD
Use device /dev/skd$N amd /dev/skd$Np$M.
-config BLK_DEV_SX8
- tristate "Promise SATA SX8 support"
- depends on PCI
- ---help---
- Saying Y or M here will enable support for the
- Promise SATA SX8 controllers.
-
- Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
-
config BLK_DEV_RAM
tristate "RAM block device support"
---help---
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index a53cc1e3a2d3..d8f2f72c3b62 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -26,8 +26,6 @@ obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
-obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
-
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3cf9bc5d8d95..3d5117be57f9 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -420,13 +420,16 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu
rcu_read_lock();
for_each_netdev_rcu(&init_net, ifp) {
dev_hold(ifp);
- if (!is_aoe_netif(ifp))
- goto cont;
+ if (!is_aoe_netif(ifp)) {
+ dev_put(ifp);
+ continue;
+ }
skb = new_skb(sizeof *h + sizeof *ch);
if (skb == NULL) {
printk(KERN_INFO "aoe: skb alloc failure\n");
- goto cont;
+ dev_put(ifp);
+ continue;
}
skb_put(skb, sizeof *h + sizeof *ch);
skb->dev = ifp;
@@ -441,9 +444,6 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *qu
h->major = cpu_to_be16(aoemajor);
h->minor = aoeminor;
h->cmd = AOECMD_CFG;
-
-cont:
- dev_put(ifp);
}
rcu_read_unlock();
}
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index 63773a90581d..1e66c7a188a1 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -64,6 +64,7 @@ tx(int id) __must_hold(&txlock)
pr_warn("aoe: packet could not be sent on %s. %s\n",
ifp ? ifp->name : "netif",
"consider increasing tx_queue_len");
+ dev_put(ifp);
spin_lock_irq(&txlock);
}
return 0;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 5ece2fd70d9c..f3a96c76f5a4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2778,7 +2778,7 @@ static int init_submitter(struct drbd_device *device)
enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor)
{
struct drbd_resource *resource = adm_ctx->resource;
- struct drbd_connection *connection;
+ struct drbd_connection *connection, *n;
struct drbd_device *device;
struct drbd_peer_device *peer_device, *tmp_peer_device;
struct gendisk *disk;
@@ -2906,7 +2906,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
out_idr_remove_vol:
idr_remove(&connection->peer_devices, vnr);
out_idr_remove_from_resource:
- for_each_connection(connection, resource) {
+ for_each_connection_safe(connection, n, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
if (peer_device)
kref_put(&connection->kref, drbd_destroy_connection);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 52dd517ff541..43c142ea364b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -3426,7 +3426,7 @@ int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nlattr *resource_filter;
struct drbd_resource *resource;
- struct drbd_device *uninitialized_var(device);
+ struct drbd_device *device;
int minor, err, retcode;
struct drbd_genlmsghdr *dh;
struct device_info device_info;
@@ -3515,7 +3515,7 @@ int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nlattr *resource_filter;
struct drbd_resource *resource = NULL, *next_resource;
- struct drbd_connection *uninitialized_var(connection);
+ struct drbd_connection *connection;
int err = 0, retcode;
struct drbd_genlmsghdr *dh;
struct connection_info connection_info;
@@ -3677,7 +3677,7 @@ int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nlattr *resource_filter;
struct drbd_resource *resource;
- struct drbd_device *uninitialized_var(device);
+ struct drbd_device *device;
struct drbd_peer_device *peer_device = NULL;
int minor, err, retcode;
struct drbd_genlmsghdr *dh;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 2b3103c30857..d94f41a0abbe 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1298,7 +1298,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
bio_set_dev(bio, device->ldev->backing_bdev);
bio->bi_private = octx;
bio->bi_end_io = one_flush_endio;
- bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
+ bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
device->flush_jif = jiffies;
set_bit(FLUSH_PENDING, &device->flags);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ad9b767ec40d..8396a4f99526 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -226,24 +226,30 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
blk_mq_unfreeze_queue(lo->lo_queue);
}
-static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+/**
+ * loop_set_size() - sets device size and notifies userspace
+ * @lo: struct loop_device to set the size for
+ * @size: new size of the loop device
+ *
+ * Callers must validate that the size passed into this function fits into
+ * a sector_t, eg using loop_validate_size()
+ */
+static void loop_set_size(struct loop_device *lo, loff_t size)
{
- loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
- sector_t x = (sector_t)size;
struct block_device *bdev = lo->lo_device;
- if (unlikely((loff_t)x != size))
- return -EFBIG;
- if (lo->lo_offset != offset)
- lo->lo_offset = offset;
- if (lo->lo_sizelimit != sizelimit)
- lo->lo_sizelimit = sizelimit;
- set_capacity(lo->lo_disk, x);
- bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
+ set_capacity(lo->lo_disk, size);
+ bd_set_size(bdev, size << SECTOR_SHIFT);
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
- return 0;
+}
+
+static void
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+{
+ loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
+
+ loop_set_size(lo, size);
}
static inline int
@@ -1020,10 +1026,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo_flags |= LO_FLAGS_READ_ONLY;
- error = -EFBIG;
size = get_loop_size(lo, file);
- if ((loff_t)(sector_t)size != size)
- goto out_unlock;
+
error = loop_prepare_queue(lo);
if (error)
goto out_unlock;
@@ -1057,11 +1061,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
loop_update_rotational(lo);
loop_update_dio(lo);
- set_capacity(lo->lo_disk, size);
- bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
- /* let user-space know about the new size */
- kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+ loop_set_size(lo, size);
set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
block_size(inode->i_bdev) : PAGE_SIZE);
@@ -1275,82 +1276,50 @@ static int loop_clr_fd(struct loop_device *lo)
return __loop_clr_fd(lo, false);
}
+/**
+ * loop_set_status_from_info - configure device from loop_info
+ * @lo: struct loop_device to configure
+ * @info: struct loop_info64 to configure the device with
+ *
+ * Configures the loop device parameters according to the passed
+ * in loop_info64 configuration.
+ */
static int
-loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
+loop_set_status_from_info(struct loop_device *lo,
+ const struct loop_info64 *info)
{
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
- struct block_device *bdev;
- bool partscan = false;
-
- err = mutex_lock_killable(&loop_ctl_mutex);
- if (err)
- return err;
- if (lo->lo_encrypt_key_size &&
- !uid_eq(lo->lo_key_owner, uid) &&
- !capable(CAP_SYS_ADMIN)) {
- err = -EPERM;
- goto out_unlock;
- }
- if (lo->lo_state != Lo_bound) {
- err = -ENXIO;
- goto out_unlock;
- }
- if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) {
- err = -EINVAL;
- goto out_unlock;
- }
- if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit) {
- sync_blockdev(lo->lo_device);
- invalidate_bdev(lo->lo_device);
- }
-
- /* I/O need to be drained during transfer transition */
- blk_mq_freeze_queue(lo->lo_queue);
+ if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
+ return -EINVAL;
err = loop_release_xfer(lo);
if (err)
- goto out_unfreeze;
+ return err;
if (info->lo_encrypt_type) {
unsigned int type = info->lo_encrypt_type;
- if (type >= MAX_LO_CRYPT) {
- err = -EINVAL;
- goto out_unfreeze;
- }
+ if (type >= MAX_LO_CRYPT)
+ return -EINVAL;
xfer = xfer_funcs[type];
- if (xfer == NULL) {
- err = -EINVAL;
- goto out_unfreeze;
- }
+ if (xfer == NULL)
+ return -EINVAL;
} else
xfer = NULL;
err = loop_init_xfer(lo, xfer, info);
if (err)
- goto out_unfreeze;
+ return err;
- if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit) {
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
- err = -EFBIG;
- goto out_unfreeze;
- }
- }
+ /* Avoid assigning overflow values */
+ if (info->lo_offset > LLONG_MAX || info->lo_sizelimit > LLONG_MAX)
+ return -EOVERFLOW;
- loop_config_discard(lo);
+ lo->lo_offset = info->lo_offset;
+ lo->lo_sizelimit = info->lo_sizelimit;
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
@@ -1375,6 +1344,63 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
lo->lo_key_owner = uid;
}
+ return 0;
+}
+
+static int
+loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
+{
+ int err;
+ struct block_device *bdev;
+ kuid_t uid = current_uid();
+ bool partscan = false;
+ bool size_changed = false;
+
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
+ if (lo->lo_encrypt_key_size &&
+ !uid_eq(lo->lo_key_owner, uid) &&
+ !capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out_unlock;
+ }
+ if (lo->lo_state != Lo_bound) {
+ err = -ENXIO;
+ goto out_unlock;
+ }
+
+ if (lo->lo_offset != info->lo_offset ||
+ lo->lo_sizelimit != info->lo_sizelimit) {
+ size_changed = true;
+ sync_blockdev(lo->lo_device);
+ invalidate_bdev(lo->lo_device);
+ }
+
+ /* I/O need to be drained during transfer transition */
+ blk_mq_freeze_queue(lo->lo_queue);
+
+ if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
+ /* If any pages were dirtied after invalidate_bdev(), try again */
+ err = -EAGAIN;
+ pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
+ __func__, lo->lo_number, lo->lo_file_name,
+ lo->lo_device->bd_inode->i_mapping->nrpages);
+ goto out_unfreeze;
+ }
+
+ err = loop_set_status_from_info(lo, info);
+ if (err)
+ goto out_unfreeze;
+
+ if (size_changed) {
+ loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
+ lo->lo_backing_file);
+ loop_set_size(lo, new_size);
+ }
+
+ loop_config_discard(lo);
+
/* update dio if lo_offset or transfer is changed */
__loop_update_dio(lo, lo->use_dio);
@@ -1550,7 +1576,9 @@ static int loop_set_capacity(struct loop_device *lo)
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+ figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+
+ return 0;
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -2050,7 +2078,7 @@ static int loop_add(struct loop_device **l, int i)
lo->tag_set.queue_depth = 128;
lo->tag_set.numa_node = NUMA_NO_NODE;
lo->tag_set.cmd_size = sizeof(struct loop_cmd);
- lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+ lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED;
lo->tag_set.driver_data = lo;
err = blk_mq_alloc_tag_set(&lo->tag_set);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 09323b0510f0..f2d847ffcbc7 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1327,10 +1327,12 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
mutex_unlock(&nbd->config_lock);
ret = wait_event_interruptible(config->recv_wq,
atomic_read(&config->recv_threads) == 0);
- if (ret)
+ if (ret) {
sock_shutdown(nbd);
- flush_workqueue(nbd->recv_workq);
+ nbd_clear_que(nbd);
+ }
+ flush_workqueue(nbd->recv_workq);
mutex_lock(&nbd->config_lock);
nbd_bdev_reset(bdev);
/* user requested, ignore socket errors */
@@ -1607,7 +1609,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
return -EIO;
dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
- if (!dir) {
+ if (IS_ERR(dir)) {
dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
nbd_name(nbd));
return -EIO;
@@ -1633,7 +1635,7 @@ static int nbd_dbg_init(void)
struct dentry *dbg_dir;
dbg_dir = debugfs_create_dir("nbd", NULL);
- if (!dbg_dir)
+ if (IS_ERR(dbg_dir))
return -EIO;
nbd_dbg_dir = dbg_dir;
@@ -1706,7 +1708,8 @@ static int nbd_dev_add(int index)
if (err == -ENOSPC)
err = -EEXIST;
} else {
- err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
+ err = idr_alloc(&nbd_index_idr, nbd, 0,
+ (MINORMASK >> part_shift) + 1, GFP_KERNEL);
if (err >= 0)
index = err;
}
@@ -2310,6 +2313,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info)
}
dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
+ if (!dev_list) {
+ nlmsg_free(reply);
+ ret = -EMSGSIZE;
+ goto out;
+ }
+
if (index == -1) {
ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
if (ret) {
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 13eae973eaea..6cbdd8a691d2 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1711,8 +1711,13 @@ static int null_add_dev(struct nullb_device *dev)
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
mutex_lock(&lock);
- nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
- dev->index = nullb->index;
+ rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+ if (rv < 0) {
+ mutex_unlock(&lock);
+ goto out_cleanup_zone;
+ }
+ nullb->index = rv;
+ dev->index = rv;
mutex_unlock(&lock);
blk_queue_logical_block_size(nullb->q, dev->blocksize);
@@ -1724,13 +1729,16 @@ static int null_add_dev(struct nullb_device *dev)
rv = null_gendisk_register(nullb);
if (rv)
- goto out_cleanup_zone;
+ goto out_ida_free;
mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
mutex_unlock(&lock);
return 0;
+
+out_ida_free:
+ ida_free(&nullb_indexes, nullb->index);
out_cleanup_zone:
if (dev->zoned)
null_zone_exit(dev);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 55745d6953f0..fd0cdced8024 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -626,9 +626,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
static int rbd_dev_refresh(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
-static int rbd_dev_header_info(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header);
static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
u64 snap_id);
static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
@@ -1098,15 +1097,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev)
RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
}
+static void rbd_image_header_cleanup(struct rbd_image_header *header)
+{
+ kfree(header->object_prefix);
+ ceph_put_snap_context(header->snapc);
+ kfree(header->snap_sizes);
+ kfree(header->snap_names);
+
+ memset(header, 0, sizeof(*header));
+}
+
/*
* Fill an rbd image header with information from the given format 1
* on-disk header.
*/
-static int rbd_header_from_disk(struct rbd_device *rbd_dev,
- struct rbd_image_header_ondisk *ondisk)
+static int rbd_header_from_disk(struct rbd_image_header *header,
+ struct rbd_image_header_ondisk *ondisk,
+ bool first_time)
{
- struct rbd_image_header *header = &rbd_dev->header;
- bool first_time = header->object_prefix == NULL;
struct ceph_snap_context *snapc;
char *object_prefix = NULL;
char *snap_names = NULL;
@@ -1173,11 +1181,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
if (first_time) {
header->object_prefix = object_prefix;
header->obj_order = ondisk->options.order;
- rbd_init_layout(rbd_dev);
- } else {
- ceph_put_snap_context(header->snapc);
- kfree(header->snap_names);
- kfree(header->snap_sizes);
}
/* The remaining fields always get updated (when we refresh) */
@@ -1493,14 +1496,30 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
/*
* Must be called after rbd_obj_calc_img_extents().
*/
-static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req)
+static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
{
- if (!obj_req->num_img_extents ||
- (rbd_obj_is_entire(obj_req) &&
- !obj_req->img_request->snapc->num_snaps))
- return false;
+ rbd_assert(obj_req->img_request->snapc);
- return true;
+ if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
+ dout("%s %p objno %llu discard\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return;
+ }
+
+ if (!obj_req->num_img_extents) {
+ dout("%s %p objno %llu not overlapping\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return;
+ }
+
+ if (rbd_obj_is_entire(obj_req) &&
+ !obj_req->img_request->snapc->num_snaps) {
+ dout("%s %p objno %llu entire\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return;
+ }
+
+ obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
}
static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
@@ -1599,6 +1618,7 @@ __rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
static struct ceph_osd_request *
rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
{
+ rbd_assert(obj_req->img_request->snapc);
return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
num_ops);
}
@@ -1727,11 +1747,14 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
* Caller is responsible for filling in the list of object requests
* that comprises the image request, and the Linux request pointer
* (if there is one).
+ *
+ * Only snap_id is captured here, for reads. For writes, snapshot
+ * context is captured in rbd_img_object_requests() after exclusive
+ * lock is ensured to be held.
*/
static struct rbd_img_request *rbd_img_request_create(
struct rbd_device *rbd_dev,
- enum obj_operation_type op_type,
- struct ceph_snap_context *snapc)
+ enum obj_operation_type op_type)
{
struct rbd_img_request *img_request;
@@ -1743,8 +1766,6 @@ static struct rbd_img_request *rbd_img_request_create(
img_request->op_type = op_type;
if (!rbd_img_is_write(img_request))
img_request->snap_id = rbd_dev->spec->snap_id;
- else
- img_request->snapc = snapc;
if (rbd_dev_parent_get(rbd_dev))
img_request_layered_set(img_request);
@@ -2087,7 +2108,7 @@ static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
struct ceph_osd_data *osd_data;
u64 objno;
- u8 state, new_state, uninitialized_var(current_state);
+ u8 state, new_state, current_state;
bool has_current_state;
void *p;
@@ -2389,9 +2410,6 @@ static int rbd_obj_init_write(struct rbd_obj_request *obj_req)
if (ret)
return ret;
- if (rbd_obj_copyup_enabled(obj_req))
- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
-
obj_req->write_state = RBD_OBJ_WRITE_START;
return 0;
}
@@ -2497,8 +2515,6 @@ static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
if (ret)
return ret;
- if (rbd_obj_copyup_enabled(obj_req))
- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
if (!obj_req->num_img_extents) {
obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
if (rbd_obj_is_entire(obj_req))
@@ -2935,7 +2951,7 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
int ret;
child_img_req = rbd_img_request_create(img_req->rbd_dev->parent,
- OBJ_OP_READ, NULL);
+ OBJ_OP_READ);
if (!child_img_req)
return -ENOMEM;
@@ -3439,6 +3455,7 @@ again:
case RBD_OBJ_WRITE_START:
rbd_assert(!*result);
+ rbd_obj_set_copyup_enabled(obj_req);
if (rbd_obj_write_is_noop(obj_req))
return true;
@@ -3586,14 +3603,15 @@ static bool rbd_lock_add_request(struct rbd_img_request *img_req)
static void rbd_lock_del_request(struct rbd_img_request *img_req)
{
struct rbd_device *rbd_dev = img_req->rbd_dev;
- bool need_wakeup;
+ bool need_wakeup = false;
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
- rbd_assert(!list_empty(&img_req->lock_item));
- list_del_init(&img_req->lock_item);
- need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
- list_empty(&rbd_dev->running_list));
+ if (!list_empty(&img_req->lock_item)) {
+ list_del_init(&img_req->lock_item);
+ need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
+ list_empty(&rbd_dev->running_list));
+ }
spin_unlock(&rbd_dev->lock_lists_lock);
if (need_wakeup)
complete(&rbd_dev->releasing_wait);
@@ -3625,9 +3643,19 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
static void rbd_img_object_requests(struct rbd_img_request *img_req)
{
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
struct rbd_obj_request *obj_req;
rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
+ rbd_assert(!need_exclusive_lock(img_req) ||
+ __rbd_is_lock_owner(rbd_dev));
+
+ if (rbd_img_is_write(img_req)) {
+ rbd_assert(!img_req->snapc);
+ down_read(&rbd_dev->header_rwsem);
+ img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+ up_read(&rbd_dev->header_rwsem);
+ }
for_each_obj_request(img_req, obj_req) {
int result = 0;
@@ -3645,7 +3673,6 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
{
- struct rbd_device *rbd_dev = img_req->rbd_dev;
int ret;
again:
@@ -3666,9 +3693,6 @@ again:
if (*result)
return true;
- rbd_assert(!need_exclusive_lock(img_req) ||
- __rbd_is_lock_owner(rbd_dev));
-
rbd_img_object_requests(img_req);
if (!img_req->pending.num_pending) {
*result = img_req->pending.result;
@@ -3974,14 +3998,19 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
return;
}
- list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) {
+ while (!list_empty(&rbd_dev->acquiring_list)) {
+ img_req = list_first_entry(&rbd_dev->acquiring_list,
+ struct rbd_img_request, lock_item);
mutex_lock(&img_req->state_mutex);
rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
+ if (!result)
+ list_move_tail(&img_req->lock_item,
+ &rbd_dev->running_list);
+ else
+ list_del_init(&img_req->lock_item);
rbd_img_schedule(img_req, result);
mutex_unlock(&img_req->state_mutex);
}
-
- list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
}
static int get_lock_owner_info(struct rbd_device *rbd_dev,
@@ -4130,6 +4159,10 @@ static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
{
int ret;
+ ret = rbd_dev_refresh(rbd_dev);
+ if (ret)
+ return ret;
+
if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
ret = rbd_object_map_open(rbd_dev);
if (ret)
@@ -4788,7 +4821,6 @@ static void rbd_queue_workfn(struct work_struct *work)
struct request *rq = blk_mq_rq_from_pdu(work);
struct rbd_device *rbd_dev = rq->q->queuedata;
struct rbd_img_request *img_request;
- struct ceph_snap_context *snapc = NULL;
u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
u64 length = blk_rq_bytes(rq);
enum obj_operation_type op_type;
@@ -4853,10 +4885,6 @@ static void rbd_queue_workfn(struct work_struct *work)
down_read(&rbd_dev->header_rwsem);
mapping_size = rbd_dev->mapping.size;
- if (op_type != OBJ_OP_READ) {
- snapc = rbd_dev->header.snapc;
- ceph_get_snap_context(snapc);
- }
up_read(&rbd_dev->header_rwsem);
if (offset + length > mapping_size) {
@@ -4866,13 +4894,12 @@ static void rbd_queue_workfn(struct work_struct *work)
goto err_rq;
}
- img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
+ img_request = rbd_img_request_create(rbd_dev, op_type);
if (!img_request) {
result = -ENOMEM;
goto err_rq;
}
img_request->rq = rq;
- snapc = NULL; /* img_request consumes a ref */
dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev,
img_request, obj_op_name(op_type), offset, length);
@@ -4894,7 +4921,6 @@ err_rq:
if (result)
rbd_warn(rbd_dev, "%s %llx at %llx result %d",
obj_op_name(op_type), length, offset, result);
- ceph_put_snap_context(snapc);
err:
blk_mq_end_request(rq, errno_to_blk_status(result));
}
@@ -4966,7 +4992,9 @@ out_req:
* return, the rbd_dev->header field will contain up-to-date
* information about the image.
*/
-static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header,
+ bool first_time)
{
struct rbd_image_header_ondisk *ondisk = NULL;
u32 snap_count = 0;
@@ -5014,7 +5042,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
snap_count = le32_to_cpu(ondisk->snap_count);
} while (snap_count != want_count);
- ret = rbd_header_from_disk(rbd_dev, ondisk);
+ ret = rbd_header_from_disk(header, ondisk, first_time);
out:
kfree(ondisk);
@@ -5058,43 +5086,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev)
}
}
-static int rbd_dev_refresh(struct rbd_device *rbd_dev)
-{
- u64 mapping_size;
- int ret;
-
- down_write(&rbd_dev->header_rwsem);
- mapping_size = rbd_dev->mapping.size;
-
- ret = rbd_dev_header_info(rbd_dev);
- if (ret)
- goto out;
-
- /*
- * If there is a parent, see if it has disappeared due to the
- * mapped image getting flattened.
- */
- if (rbd_dev->parent) {
- ret = rbd_dev_v2_parent_info(rbd_dev);
- if (ret)
- goto out;
- }
-
- if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
- rbd_dev->mapping.size = rbd_dev->header.image_size;
- } else {
- /* validate mapped snapshot's EXISTS flag */
- rbd_exists_validate(rbd_dev);
- }
-
-out:
- up_write(&rbd_dev->header_rwsem);
- if (!ret && mapping_size != rbd_dev->mapping.size)
- rbd_dev_update_size(rbd_dev);
-
- return ret;
-}
-
static int rbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
unsigned int hctx_idx, unsigned int numa_node)
{
@@ -5529,8 +5520,7 @@ static void rbd_dev_release(struct device *dev)
module_put(THIS_MODULE);
}
-static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
- struct rbd_spec *spec)
+static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec)
{
struct rbd_device *rbd_dev;
@@ -5575,9 +5565,6 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
rbd_dev->dev.parent = &rbd_root_dev;
device_initialize(&rbd_dev->dev);
- rbd_dev->rbd_client = rbdc;
- rbd_dev->spec = spec;
-
return rbd_dev;
}
@@ -5590,12 +5577,10 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
{
struct rbd_device *rbd_dev;
- rbd_dev = __rbd_dev_create(rbdc, spec);
+ rbd_dev = __rbd_dev_create(spec);
if (!rbd_dev)
return NULL;
- rbd_dev->opts = opts;
-
/* get an id and fill in device name */
rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0,
minor_to_rbd_dev_id(1 << MINORBITS),
@@ -5612,6 +5597,10 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
/* we have a ref from do_rbd_add() */
__module_get(THIS_MODULE);
+ rbd_dev->rbd_client = rbdc;
+ rbd_dev->spec = spec;
+ rbd_dev->opts = opts;
+
dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id);
return rbd_dev;
@@ -5666,17 +5655,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
return 0;
}
-static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
-{
- return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
- &rbd_dev->header.obj_order,
- &rbd_dev->header.image_size);
-}
-
-static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev,
+ char **pobject_prefix)
{
size_t size;
void *reply_buf;
+ char *object_prefix;
int ret;
void *p;
@@ -5694,16 +5678,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
goto out;
p = reply_buf;
- rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
- p + ret, NULL, GFP_NOIO);
+ object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL,
+ GFP_NOIO);
+ if (IS_ERR(object_prefix)) {
+ ret = PTR_ERR(object_prefix);
+ goto out;
+ }
ret = 0;
- if (IS_ERR(rbd_dev->header.object_prefix)) {
- ret = PTR_ERR(rbd_dev->header.object_prefix);
- rbd_dev->header.object_prefix = NULL;
- } else {
- dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
- }
+ *pobject_prefix = object_prefix;
+ dout(" object_prefix = %s\n", object_prefix);
out:
kfree(reply_buf);
@@ -5748,12 +5732,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
return 0;
}
-static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
-{
- return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
- &rbd_dev->header.features);
-}
-
/*
* These are generic image flags, but since they are used only for
* object map, store them in rbd_dev->object_map_flags.
@@ -5790,6 +5768,14 @@ struct parent_image_info {
u64 overlap;
};
+static void rbd_parent_info_cleanup(struct parent_image_info *pii)
+{
+ kfree(pii->pool_ns);
+ kfree(pii->image_id);
+
+ memset(pii, 0, sizeof(*pii));
+}
+
/*
* The caller is responsible for @pii.
*/
@@ -5859,6 +5845,9 @@ static int __get_parent_info(struct rbd_device *rbd_dev,
if (pii->has_overlap)
ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
+ pii->has_overlap, pii->overlap);
return 0;
e_inval:
@@ -5897,14 +5886,17 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
pii->has_overlap = true;
ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
+ pii->has_overlap, pii->overlap);
return 0;
e_inval:
return -EINVAL;
}
-static int get_parent_info(struct rbd_device *rbd_dev,
- struct parent_image_info *pii)
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev,
+ struct parent_image_info *pii)
{
struct page *req_page, *reply_page;
void *p;
@@ -5932,7 +5924,7 @@ static int get_parent_info(struct rbd_device *rbd_dev,
return ret;
}
-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+static int rbd_dev_setup_parent(struct rbd_device *rbd_dev)
{
struct rbd_spec *parent_spec;
struct parent_image_info pii = { 0 };
@@ -5942,37 +5934,12 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
if (!parent_spec)
return -ENOMEM;
- ret = get_parent_info(rbd_dev, &pii);
+ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
if (ret)
goto out_err;
- dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
- __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
- pii.has_overlap, pii.overlap);
-
- if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
- /*
- * Either the parent never existed, or we have
- * record of it but the image got flattened so it no
- * longer has a parent. When the parent of a
- * layered image disappears we immediately set the
- * overlap to 0. The effect of this is that all new
- * requests will be treated as if the image had no
- * parent.
- *
- * If !pii.has_overlap, the parent image spec is not
- * applicable. It's there to avoid duplication in each
- * snapshot record.
- */
- if (rbd_dev->parent_overlap) {
- rbd_dev->parent_overlap = 0;
- rbd_dev_parent_put(rbd_dev);
- pr_info("%s: clone image has been flattened\n",
- rbd_dev->disk->disk_name);
- }
-
+ if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap)
goto out; /* No parent? No problem. */
- }
/* The ceph file layout needs to fit pool id in 32 bits */
@@ -5984,58 +5951,46 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
}
/*
- * The parent won't change (except when the clone is
- * flattened, already handled that). So we only need to
- * record the parent spec we have not already done so.
+ * The parent won't change except when the clone is flattened,
+ * so we only need to record the parent image spec once.
*/
- if (!rbd_dev->parent_spec) {
- parent_spec->pool_id = pii.pool_id;
- if (pii.pool_ns && *pii.pool_ns) {
- parent_spec->pool_ns = pii.pool_ns;
- pii.pool_ns = NULL;
- }
- parent_spec->image_id = pii.image_id;
- pii.image_id = NULL;
- parent_spec->snap_id = pii.snap_id;
-
- rbd_dev->parent_spec = parent_spec;
- parent_spec = NULL; /* rbd_dev now owns this */
+ parent_spec->pool_id = pii.pool_id;
+ if (pii.pool_ns && *pii.pool_ns) {
+ parent_spec->pool_ns = pii.pool_ns;
+ pii.pool_ns = NULL;
}
+ parent_spec->image_id = pii.image_id;
+ pii.image_id = NULL;
+ parent_spec->snap_id = pii.snap_id;
+
+ rbd_assert(!rbd_dev->parent_spec);
+ rbd_dev->parent_spec = parent_spec;
+ parent_spec = NULL; /* rbd_dev now owns this */
/*
- * We always update the parent overlap. If it's zero we issue
- * a warning, as we will proceed as if there was no parent.
+ * Record the parent overlap. If it's zero, issue a warning as
+ * we will proceed as if there is no parent.
*/
- if (!pii.overlap) {
- if (parent_spec) {
- /* refresh, careful to warn just once */
- if (rbd_dev->parent_overlap)
- rbd_warn(rbd_dev,
- "clone now standalone (overlap became 0)");
- } else {
- /* initial probe */
- rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
- }
- }
+ if (!pii.overlap)
+ rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
rbd_dev->parent_overlap = pii.overlap;
out:
ret = 0;
out_err:
- kfree(pii.pool_ns);
- kfree(pii.image_id);
+ rbd_parent_info_cleanup(&pii);
rbd_spec_put(parent_spec);
return ret;
}
-static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev,
+ u64 *stripe_unit, u64 *stripe_count)
{
struct {
__le64 stripe_unit;
__le64 stripe_count;
} __attribute__ ((packed)) striping_info_buf = { 0 };
size_t size = sizeof (striping_info_buf);
- void *p;
int ret;
ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
@@ -6047,27 +6002,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
if (ret < size)
return -ERANGE;
- p = &striping_info_buf;
- rbd_dev->header.stripe_unit = ceph_decode_64(&p);
- rbd_dev->header.stripe_count = ceph_decode_64(&p);
+ *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit);
+ *stripe_count = le64_to_cpu(striping_info_buf.stripe_count);
+ dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit,
+ *stripe_count);
+
return 0;
}
-static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id)
{
- __le64 data_pool_id;
+ __le64 data_pool_buf;
int ret;
ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
&rbd_dev->header_oloc, "get_data_pool",
- NULL, 0, &data_pool_id, sizeof(data_pool_id));
+ NULL, 0, &data_pool_buf,
+ sizeof(data_pool_buf));
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret < 0)
return ret;
- if (ret < sizeof(data_pool_id))
+ if (ret < sizeof(data_pool_buf))
return -EBADMSG;
- rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
- WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
+ *data_pool_id = le64_to_cpu(data_pool_buf);
+ dout(" data_pool_id = %lld\n", *data_pool_id);
+ WARN_ON(*data_pool_id == CEPH_NOPOOL);
+
return 0;
}
@@ -6259,7 +6220,8 @@ out_err:
return ret;
}
-static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev,
+ struct ceph_snap_context **psnapc)
{
size_t size;
int ret;
@@ -6320,9 +6282,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
for (i = 0; i < snap_count; i++)
snapc->snaps[i] = ceph_decode_64(&p);
- ceph_put_snap_context(rbd_dev->header.snapc);
- rbd_dev->header.snapc = snapc;
-
+ *psnapc = snapc;
dout(" snap context seq = %llu, snap_count = %u\n",
(unsigned long long)seq, (unsigned int)snap_count);
out:
@@ -6371,38 +6331,42 @@ out:
return snap_name;
}
-static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header,
+ bool first_time)
{
- bool first_time = rbd_dev->header.object_prefix == NULL;
int ret;
- ret = rbd_dev_v2_image_size(rbd_dev);
+ ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
+ first_time ? &header->obj_order : NULL,
+ &header->image_size);
if (ret)
return ret;
if (first_time) {
- ret = rbd_dev_v2_header_onetime(rbd_dev);
+ ret = rbd_dev_v2_header_onetime(rbd_dev, header);
if (ret)
return ret;
}
- ret = rbd_dev_v2_snap_context(rbd_dev);
- if (ret && first_time) {
- kfree(rbd_dev->header.object_prefix);
- rbd_dev->header.object_prefix = NULL;
- }
+ ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
+ if (ret)
+ return ret;
- return ret;
+ return 0;
}
-static int rbd_dev_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_header_info(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header,
+ bool first_time)
{
rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ rbd_assert(!header->object_prefix && !header->snapc);
if (rbd_dev->image_format == 1)
- return rbd_dev_v1_header_info(rbd_dev);
+ return rbd_dev_v1_header_info(rbd_dev, header, first_time);
- return rbd_dev_v2_header_info(rbd_dev);
+ return rbd_dev_v2_header_info(rbd_dev, header, first_time);
}
/*
@@ -6752,60 +6716,49 @@ out:
*/
static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
{
- struct rbd_image_header *header;
-
rbd_dev_parent_put(rbd_dev);
rbd_object_map_free(rbd_dev);
rbd_dev_mapping_clear(rbd_dev);
/* Free dynamic fields from the header, then zero it out */
- header = &rbd_dev->header;
- ceph_put_snap_context(header->snapc);
- kfree(header->snap_sizes);
- kfree(header->snap_names);
- kfree(header->object_prefix);
- memset(header, 0, sizeof (*header));
+ rbd_image_header_cleanup(&rbd_dev->header);
}
-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header)
{
int ret;
- ret = rbd_dev_v2_object_prefix(rbd_dev);
+ ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
if (ret)
- goto out_err;
+ return ret;
/*
* Get the and check features for the image. Currently the
* features are assumed to never change.
*/
- ret = rbd_dev_v2_features(rbd_dev);
+ ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
+ &header->features);
if (ret)
- goto out_err;
+ return ret;
/* If the image supports fancy striping, get its parameters */
- if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
- ret = rbd_dev_v2_striping_info(rbd_dev);
- if (ret < 0)
- goto out_err;
+ if (header->features & RBD_FEATURE_STRIPINGV2) {
+ ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
+ &header->stripe_count);
+ if (ret)
+ return ret;
}
- if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
- ret = rbd_dev_v2_data_pool(rbd_dev);
+ if (header->features & RBD_FEATURE_DATA_POOL) {
+ ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
if (ret)
- goto out_err;
+ return ret;
}
- rbd_init_layout(rbd_dev);
return 0;
-
-out_err:
- rbd_dev->header.features = 0;
- kfree(rbd_dev->header.object_prefix);
- rbd_dev->header.object_prefix = NULL;
- return ret;
}
/*
@@ -6827,7 +6780,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
goto out_err;
}
- parent = __rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec);
+ parent = __rbd_dev_create(rbd_dev->parent_spec);
if (!parent) {
ret = -ENOMEM;
goto out_err;
@@ -6837,8 +6790,8 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
* Images related by parent/child relationships always share
* rbd_client and spec/parent_spec, so bump their refcounts.
*/
- __rbd_get_client(rbd_dev->rbd_client);
- rbd_spec_get(rbd_dev->parent_spec);
+ parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client);
+ parent->spec = rbd_spec_get(rbd_dev->parent_spec);
ret = rbd_dev_image_probe(parent, depth);
if (ret < 0)
@@ -6983,10 +6936,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
if (!depth)
down_write(&rbd_dev->header_rwsem);
- ret = rbd_dev_header_info(rbd_dev);
+ ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
if (ret)
goto err_out_probe;
+ rbd_init_layout(rbd_dev);
+
/*
* If this image is the one being mapped, we have pool name and
* id, image name and id, and snap name - need to fill snap id.
@@ -7020,7 +6975,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
}
if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
- ret = rbd_dev_v2_parent_info(rbd_dev);
+ ret = rbd_dev_setup_parent(rbd_dev);
if (ret)
goto err_out_probe;
}
@@ -7046,6 +7001,112 @@ err_out_format:
return ret;
}
+static void rbd_dev_update_header(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header)
+{
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
+
+ if (rbd_dev->header.image_size != header->image_size) {
+ rbd_dev->header.image_size = header->image_size;
+
+ if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
+ rbd_dev->mapping.size = header->image_size;
+ rbd_dev_update_size(rbd_dev);
+ }
+ }
+
+ if (rbd_dev->spec->snap_id != CEPH_NOSNAP) {
+ /* validate mapped snapshot's EXISTS flag */
+ rbd_exists_validate(rbd_dev);
+ }
+
+ ceph_put_snap_context(rbd_dev->header.snapc);
+ rbd_dev->header.snapc = header->snapc;
+ header->snapc = NULL;
+
+ if (rbd_dev->image_format == 1) {
+ kfree(rbd_dev->header.snap_names);
+ rbd_dev->header.snap_names = header->snap_names;
+ header->snap_names = NULL;
+
+ kfree(rbd_dev->header.snap_sizes);
+ rbd_dev->header.snap_sizes = header->snap_sizes;
+ header->snap_sizes = NULL;
+ }
+}
+
+static void rbd_dev_update_parent(struct rbd_device *rbd_dev,
+ struct parent_image_info *pii)
+{
+ if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
+ /*
+ * Either the parent never existed, or we have
+ * record of it but the image got flattened so it no
+ * longer has a parent. When the parent of a
+ * layered image disappears we immediately set the
+ * overlap to 0. The effect of this is that all new
+ * requests will be treated as if the image had no
+ * parent.
+ *
+ * If !pii.has_overlap, the parent image spec is not
+ * applicable. It's there to avoid duplication in each
+ * snapshot record.
+ */
+ if (rbd_dev->parent_overlap) {
+ rbd_dev->parent_overlap = 0;
+ rbd_dev_parent_put(rbd_dev);
+ pr_info("%s: clone has been flattened\n",
+ rbd_dev->disk->disk_name);
+ }
+ } else {
+ rbd_assert(rbd_dev->parent_spec);
+
+ /*
+ * Update the parent overlap. If it became zero, issue
+ * a warning as we will proceed as if there is no parent.
+ */
+ if (!pii->overlap && rbd_dev->parent_overlap)
+ rbd_warn(rbd_dev,
+ "clone has become standalone (overlap 0)");
+ rbd_dev->parent_overlap = pii->overlap;
+ }
+}
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+{
+ struct rbd_image_header header = { 0 };
+ struct parent_image_info pii = { 0 };
+ int ret;
+
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ ret = rbd_dev_header_info(rbd_dev, &header, false);
+ if (ret)
+ goto out;
+
+ /*
+ * If there is a parent, see if it has disappeared due to the
+ * mapped image getting flattened.
+ */
+ if (rbd_dev->parent) {
+ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+ if (ret)
+ goto out;
+ }
+
+ down_write(&rbd_dev->header_rwsem);
+ rbd_dev_update_header(rbd_dev, &header);
+ if (rbd_dev->parent)
+ rbd_dev_update_parent(rbd_dev, &pii);
+ up_write(&rbd_dev->header_rwsem);
+
+out:
+ rbd_parent_info_cleanup(&pii);
+ rbd_image_header_cleanup(&header);
+ return ret;
+}
+
static ssize_t do_rbd_add(struct bus_type *bus,
const char *buf,
size_t count)
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 6b2fd630de85..6622dd1aa07b 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -983,6 +983,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
print_version();
hp = mdesc_grab();
+ if (!hp)
+ return -ENODEV;
err = -ENODEV;
if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
deleted file mode 100644
index 4478eb7efee0..000000000000
--- a/drivers/block/sx8.c
+++ /dev/null
@@ -1,1586 +0,0 @@
-/*
- * sx8.c: Driver for Promise SATA SX8 looks-like-I2O hardware
- *
- * Copyright 2004-2005 Red Hat, Inc.
- *
- * Author/maintainer: Jeff Garzik <jgarzik@pobox.com>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/blk-mq.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/compiler.h>
-#include <linux/workqueue.h>
-#include <linux/bitops.h>
-#include <linux/delay.h>
-#include <linux/ktime.h>
-#include <linux/hdreg.h>
-#include <linux/dma-mapping.h>
-#include <linux/completion.h>
-#include <linux/scatterlist.h>
-#include <asm/io.h>
-#include <linux/uaccess.h>
-
-#if 0
-#define CARM_DEBUG
-#define CARM_VERBOSE_DEBUG
-#else
-#undef CARM_DEBUG
-#undef CARM_VERBOSE_DEBUG
-#endif
-#undef CARM_NDEBUG
-
-#define DRV_NAME "sx8"
-#define DRV_VERSION "1.0"
-#define PFX DRV_NAME ": "
-
-MODULE_AUTHOR("Jeff Garzik");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Promise SATA SX8 block driver");
-MODULE_VERSION(DRV_VERSION);
-
-/*
- * SX8 hardware has a single message queue for all ATA ports.
- * When this driver was written, the hardware (firmware?) would
- * corrupt data eventually, if more than one request was outstanding.
- * As one can imagine, having 8 ports bottlenecking on a single
- * command hurts performance.
- *
- * Based on user reports, later versions of the hardware (firmware?)
- * seem to be able to survive with more than one command queued.
- *
- * Therefore, we default to the safe option -- 1 command -- but
- * allow the user to increase this.
- *
- * SX8 should be able to support up to ~60 queued commands (CARM_MAX_REQ),
- * but problems seem to occur when you exceed ~30, even on newer hardware.
- */
-static int max_queue = 1;
-module_param(max_queue, int, 0444);
-MODULE_PARM_DESC(max_queue, "Maximum number of queued commands. (min==1, max==30, safe==1)");
-
-
-#define NEXT_RESP(idx) ((idx + 1) % RMSG_Q_LEN)
-
-/* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */
-#define TAG_ENCODE(tag) (((tag) << 16) | 0xf)
-#define TAG_DECODE(tag) (((tag) >> 16) & 0x1f)
-#define TAG_VALID(tag) ((((tag) & 0xf) == 0xf) && (TAG_DECODE(tag) < 32))
-
-/* note: prints function name for you */
-#ifdef CARM_DEBUG
-#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#ifdef CARM_VERBOSE_DEBUG
-#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#else
-#define VPRINTK(fmt, args...)
-#endif /* CARM_VERBOSE_DEBUG */
-#else
-#define DPRINTK(fmt, args...)
-#define VPRINTK(fmt, args...)
-#endif /* CARM_DEBUG */
-
-#ifdef CARM_NDEBUG
-#define assert(expr)
-#else
-#define assert(expr) \
- if(unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- }
-#endif
-
-/* defines only for the constants which don't work well as enums */
-struct carm_host;
-
-enum {
- /* adapter-wide limits */
- CARM_MAX_PORTS = 8,
- CARM_SHM_SIZE = (4096 << 7),
- CARM_MINORS_PER_MAJOR = 256 / CARM_MAX_PORTS,
- CARM_MAX_WAIT_Q = CARM_MAX_PORTS + 1,
-
- /* command message queue limits */
- CARM_MAX_REQ = 64, /* max command msgs per host */
- CARM_MSG_LOW_WATER = (CARM_MAX_REQ / 4), /* refill mark */
-
- /* S/G limits, host-wide and per-request */
- CARM_MAX_REQ_SG = 32, /* max s/g entries per request */
- CARM_MAX_HOST_SG = 600, /* max s/g entries per host */
- CARM_SG_LOW_WATER = (CARM_MAX_HOST_SG / 4), /* re-fill mark */
-
- /* hardware registers */
- CARM_IHQP = 0x1c,
- CARM_INT_STAT = 0x10, /* interrupt status */
- CARM_INT_MASK = 0x14, /* interrupt mask */
- CARM_HMUC = 0x18, /* host message unit control */
- RBUF_ADDR_LO = 0x20, /* response msg DMA buf low 32 bits */
- RBUF_ADDR_HI = 0x24, /* response msg DMA buf high 32 bits */
- RBUF_BYTE_SZ = 0x28,
- CARM_RESP_IDX = 0x2c,
- CARM_CMS0 = 0x30, /* command message size reg 0 */
- CARM_LMUC = 0x48,
- CARM_HMPHA = 0x6c,
- CARM_INITC = 0xb5,
-
- /* bits in CARM_INT_{STAT,MASK} */
- INT_RESERVED = 0xfffffff0,
- INT_WATCHDOG = (1 << 3), /* watchdog timer */
- INT_Q_OVERFLOW = (1 << 2), /* cmd msg q overflow */
- INT_Q_AVAILABLE = (1 << 1), /* cmd msg q has free space */
- INT_RESPONSE = (1 << 0), /* response msg available */
- INT_ACK_MASK = INT_WATCHDOG | INT_Q_OVERFLOW,
- INT_DEF_MASK = INT_RESERVED | INT_Q_OVERFLOW |
- INT_RESPONSE,
-
- /* command messages, and related register bits */
- CARM_HAVE_RESP = 0x01,
- CARM_MSG_READ = 1,
- CARM_MSG_WRITE = 2,
- CARM_MSG_VERIFY = 3,
- CARM_MSG_GET_CAPACITY = 4,
- CARM_MSG_FLUSH = 5,
- CARM_MSG_IOCTL = 6,
- CARM_MSG_ARRAY = 8,
- CARM_MSG_MISC = 9,
- CARM_CME = (1 << 2),
- CARM_RME = (1 << 1),
- CARM_WZBC = (1 << 0),
- CARM_RMI = (1 << 0),
- CARM_Q_FULL = (1 << 3),
- CARM_MSG_SIZE = 288,
- CARM_Q_LEN = 48,
-
- /* CARM_MSG_IOCTL messages */
- CARM_IOC_SCAN_CHAN = 5, /* scan channels for devices */
- CARM_IOC_GET_TCQ = 13, /* get tcq/ncq depth */
- CARM_IOC_SET_TCQ = 14, /* set tcq/ncq depth */
-
- IOC_SCAN_CHAN_NODEV = 0x1f,
- IOC_SCAN_CHAN_OFFSET = 0x40,
-
- /* CARM_MSG_ARRAY messages */
- CARM_ARRAY_INFO = 0,
-
- ARRAY_NO_EXIST = (1 << 31),
-
- /* response messages */
- RMSG_SZ = 8, /* sizeof(struct carm_response) */
- RMSG_Q_LEN = 48, /* resp. msg list length */
- RMSG_OK = 1, /* bit indicating msg was successful */
- /* length of entire resp. msg buffer */
- RBUF_LEN = RMSG_SZ * RMSG_Q_LEN,
-
- PDC_SHM_SIZE = (4096 << 7), /* length of entire h/w buffer */
-
- /* CARM_MSG_MISC messages */
- MISC_GET_FW_VER = 2,
- MISC_ALLOC_MEM = 3,
- MISC_SET_TIME = 5,
-
- /* MISC_GET_FW_VER feature bits */
- FW_VER_4PORT = (1 << 2), /* 1=4 ports, 0=8 ports */
- FW_VER_NON_RAID = (1 << 1), /* 1=non-RAID firmware, 0=RAID */
- FW_VER_ZCR = (1 << 0), /* zero channel RAID (whatever that is) */
-
- /* carm_host flags */
- FL_NON_RAID = FW_VER_NON_RAID,
- FL_4PORT = FW_VER_4PORT,
- FL_FW_VER_MASK = (FW_VER_NON_RAID | FW_VER_4PORT),
- FL_DYN_MAJOR = (1 << 17),
-};
-
-enum {
- CARM_SG_BOUNDARY = 0xffffUL, /* s/g segment boundary */
-};
-
-enum scatter_gather_types {
- SGT_32BIT = 0,
- SGT_64BIT = 1,
-};
-
-enum host_states {
- HST_INVALID, /* invalid state; never used */
- HST_ALLOC_BUF, /* setting up master SHM area */
- HST_ERROR, /* we never leave here */
- HST_PORT_SCAN, /* start dev scan */
- HST_DEV_SCAN_START, /* start per-device probe */
- HST_DEV_SCAN, /* continue per-device probe */
- HST_DEV_ACTIVATE, /* activate devices we found */
- HST_PROBE_FINISHED, /* probe is complete */
- HST_PROBE_START, /* initiate probe */
- HST_SYNC_TIME, /* tell firmware what time it is */
- HST_GET_FW_VER, /* get firmware version, adapter port cnt */
-};
-
-#ifdef CARM_DEBUG
-static const char *state_name[] = {
- "HST_INVALID",
- "HST_ALLOC_BUF",
- "HST_ERROR",
- "HST_PORT_SCAN",
- "HST_DEV_SCAN_START",
- "HST_DEV_SCAN",
- "HST_DEV_ACTIVATE",
- "HST_PROBE_FINISHED",
- "HST_PROBE_START",
- "HST_SYNC_TIME",
- "HST_GET_FW_VER",
-};
-#endif
-
-struct carm_port {
- unsigned int port_no;
- struct gendisk *disk;
- struct carm_host *host;
-
- /* attached device characteristics */
- u64 capacity;
- char name[41];
- u16 dev_geom_head;
- u16 dev_geom_sect;
- u16 dev_geom_cyl;
-};
-
-struct carm_request {
- int n_elem;
- unsigned int msg_type;
- unsigned int msg_subtype;
- unsigned int msg_bucket;
- struct scatterlist sg[CARM_MAX_REQ_SG];
-};
-
-struct carm_host {
- unsigned long flags;
- void __iomem *mmio;
- void *shm;
- dma_addr_t shm_dma;
-
- int major;
- int id;
- char name[32];
-
- spinlock_t lock;
- struct pci_dev *pdev;
- unsigned int state;
- u32 fw_ver;
-
- struct blk_mq_tag_set tag_set;
- struct request_queue *oob_q;
- unsigned int n_oob;
-
- unsigned int hw_sg_used;
-
- unsigned int resp_idx;
-
- unsigned int wait_q_prod;
- unsigned int wait_q_cons;
- struct request_queue *wait_q[CARM_MAX_WAIT_Q];
-
- void *msg_base;
- dma_addr_t msg_dma;
-
- int cur_scan_dev;
- unsigned long dev_active;
- unsigned long dev_present;
- struct carm_port port[CARM_MAX_PORTS];
-
- struct work_struct fsm_task;
-
- struct completion probe_comp;
-};
-
-struct carm_response {
- __le32 ret_handle;
- __le32 status;
-} __attribute__((packed));
-
-struct carm_msg_sg {
- __le32 start;
- __le32 len;
-} __attribute__((packed));
-
-struct carm_msg_rw {
- u8 type;
- u8 id;
- u8 sg_count;
- u8 sg_type;
- __le32 handle;
- __le32 lba;
- __le16 lba_count;
- __le16 lba_high;
- struct carm_msg_sg sg[32];
-} __attribute__((packed));
-
-struct carm_msg_allocbuf {
- u8 type;
- u8 subtype;
- u8 n_sg;
- u8 sg_type;
- __le32 handle;
- __le32 addr;
- __le32 len;
- __le32 evt_pool;
- __le32 n_evt;
- __le32 rbuf_pool;
- __le32 n_rbuf;
- __le32 msg_pool;
- __le32 n_msg;
- struct carm_msg_sg sg[8];
-} __attribute__((packed));
-
-struct carm_msg_ioctl {
- u8 type;
- u8 subtype;
- u8 array_id;
- u8 reserved1;
- __le32 handle;
- __le32 data_addr;
- u32 reserved2;
-} __attribute__((packed));
-
-struct carm_msg_sync_time {
- u8 type;
- u8 subtype;
- u16 reserved1;
- __le32 handle;
- u32 reserved2;
- __le32 timestamp;
-} __attribute__((packed));
-
-struct carm_msg_get_fw_ver {
- u8 type;
- u8 subtype;
- u16 reserved1;
- __le32 handle;
- __le32 data_addr;
- u32 reserved2;
-} __attribute__((packed));
-
-struct carm_fw_ver {
- __le32 version;
- u8 features;
- u8 reserved1;
- u16 reserved2;
-} __attribute__((packed));
-
-struct carm_array_info {
- __le32 size;
-
- __le16 size_hi;
- __le16 stripe_size;
-
- __le32 mode;
-
- __le16 stripe_blk_sz;
- __le16 reserved1;
-
- __le16 cyl;
- __le16 head;
-
- __le16 sect;
- u8 array_id;
- u8 reserved2;
-
- char name[40];
-
- __le32 array_status;
-
- /* device list continues beyond this point? */
-} __attribute__((packed));
-
-static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
-static void carm_remove_one (struct pci_dev *pdev);
-static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo);
-
-static const struct pci_device_id carm_pci_tbl[] = {
- { PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
- { PCI_VENDOR_ID_PROMISE, 0x8002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
- { } /* terminate list */
-};
-MODULE_DEVICE_TABLE(pci, carm_pci_tbl);
-
-static struct pci_driver carm_driver = {
- .name = DRV_NAME,
- .id_table = carm_pci_tbl,
- .probe = carm_init_one,
- .remove = carm_remove_one,
-};
-
-static const struct block_device_operations carm_bd_ops = {
- .owner = THIS_MODULE,
- .getgeo = carm_bdev_getgeo,
-};
-
-static unsigned int carm_host_id;
-static unsigned long carm_major_alloc;
-
-
-
-static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
- struct carm_port *port = bdev->bd_disk->private_data;
-
- geo->heads = (u8) port->dev_geom_head;
- geo->sectors = (u8) port->dev_geom_sect;
- geo->cylinders = port->dev_geom_cyl;
- return 0;
-}
-
-static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE };
-
-static inline int carm_lookup_bucket(u32 msg_size)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
- if (msg_size <= msg_sizes[i])
- return i;
-
- return -ENOENT;
-}
-
-static void carm_init_buckets(void __iomem *mmio)
-{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
- writel(msg_sizes[i], mmio + CARM_CMS0 + (4 * i));
-}
-
-static inline void *carm_ref_msg(struct carm_host *host,
- unsigned int msg_idx)
-{
- return host->msg_base + (msg_idx * CARM_MSG_SIZE);
-}
-
-static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,
- unsigned int msg_idx)
-{
- return host->msg_dma + (msg_idx * CARM_MSG_SIZE);
-}
-
-static int carm_send_msg(struct carm_host *host,
- struct carm_request *crq, unsigned tag)
-{
- void __iomem *mmio = host->mmio;
- u32 msg = (u32) carm_ref_msg_dma(host, tag);
- u32 cm_bucket = crq->msg_bucket;
- u32 tmp;
- int rc = 0;
-
- VPRINTK("ENTER\n");
-
- tmp = readl(mmio + CARM_HMUC);
- if (tmp & CARM_Q_FULL) {
-#if 0
- tmp = readl(mmio + CARM_INT_MASK);
- tmp |= INT_Q_AVAILABLE;
- writel(tmp, mmio + CARM_INT_MASK);
- readl(mmio + CARM_INT_MASK); /* flush */
-#endif
- DPRINTK("host msg queue full\n");
- rc = -EBUSY;
- } else {
- writel(msg | (cm_bucket << 1), mmio + CARM_IHQP);
- readl(mmio + CARM_IHQP); /* flush */
- }
-
- return rc;
-}
-
-static int carm_array_info (struct carm_host *host, unsigned int array_idx)
-{
- struct carm_msg_ioctl *ioc;
- u32 msg_data;
- dma_addr_t msg_dma;
- struct carm_request *crq;
- struct request *rq;
- int rc;
-
- rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
- if (IS_ERR(rq)) {
- rc = -ENOMEM;
- goto err_out;
- }
- crq = blk_mq_rq_to_pdu(rq);
-
- ioc = carm_ref_msg(host, rq->tag);
- msg_dma = carm_ref_msg_dma(host, rq->tag);
- msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));
-
- crq->msg_type = CARM_MSG_ARRAY;
- crq->msg_subtype = CARM_ARRAY_INFO;
- rc = carm_lookup_bucket(sizeof(struct carm_msg_ioctl) +
- sizeof(struct carm_array_info));
- BUG_ON(rc < 0);
- crq->msg_bucket = (u32) rc;
-
- memset(ioc, 0, sizeof(*ioc));
- ioc->type = CARM_MSG_ARRAY;
- ioc->subtype = CARM_ARRAY_INFO;
- ioc->array_id = (u8) array_idx;
- ioc->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
- ioc->data_addr = cpu_to_le32(msg_data);
-
- spin_lock_irq(&host->lock);
- assert(host->state == HST_DEV_SCAN_START ||
- host->state == HST_DEV_SCAN);
- spin_unlock_irq(&host->lock);
-
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
- blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL);
-
- return 0;
-
-err_out:
- spin_lock_irq(&host->lock);
- host->state = HST_ERROR;
- spin_unlock_irq(&host->lock);
- return rc;
-}
-
-typedef unsigned int (*carm_sspc_t)(struct carm_host *, unsigned int, void *);
-
-static int carm_send_special (struct carm_host *host, carm_sspc_t func)
-{
- struct request *rq;
- struct carm_request *crq;
- struct carm_msg_ioctl *ioc;
- void *mem;
- unsigned int msg_size;
- int rc;
-
- rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
- if (IS_ERR(rq))
- return -ENOMEM;
- crq = blk_mq_rq_to_pdu(rq);
-
- mem = carm_ref_msg(host, rq->tag);
-
- msg_size = func(host, rq->tag, mem);
-
- ioc = mem;
- crq->msg_type = ioc->type;
- crq->msg_subtype = ioc->subtype;
- rc = carm_lookup_bucket(msg_size);
- BUG_ON(rc < 0);
- crq->msg_bucket = (u32) rc;
-
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
- blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL);
-
- return 0;
-}
-
-static unsigned int carm_fill_sync_time(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_sync_time *st = mem;
-
- time64_t tv = ktime_get_real_seconds();
-
- memset(st, 0, sizeof(*st));
- st->type = CARM_MSG_MISC;
- st->subtype = MISC_SET_TIME;
- st->handle = cpu_to_le32(TAG_ENCODE(idx));
- st->timestamp = cpu_to_le32(tv);
-
- return sizeof(struct carm_msg_sync_time);
-}
-
-static unsigned int carm_fill_alloc_buf(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_allocbuf *ab = mem;
-
- memset(ab, 0, sizeof(*ab));
- ab->type = CARM_MSG_MISC;
- ab->subtype = MISC_ALLOC_MEM;
- ab->handle = cpu_to_le32(TAG_ENCODE(idx));
- ab->n_sg = 1;
- ab->sg_type = SGT_32BIT;
- ab->addr = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
- ab->len = cpu_to_le32(PDC_SHM_SIZE >> 1);
- ab->evt_pool = cpu_to_le32(host->shm_dma + (16 * 1024));
- ab->n_evt = cpu_to_le32(1024);
- ab->rbuf_pool = cpu_to_le32(host->shm_dma);
- ab->n_rbuf = cpu_to_le32(RMSG_Q_LEN);
- ab->msg_pool = cpu_to_le32(host->shm_dma + RBUF_LEN);
- ab->n_msg = cpu_to_le32(CARM_Q_LEN);
- ab->sg[0].start = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
- ab->sg[0].len = cpu_to_le32(65536);
-
- return sizeof(struct carm_msg_allocbuf);
-}
-
-static unsigned int carm_fill_scan_channels(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_ioctl *ioc = mem;
- u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) +
- IOC_SCAN_CHAN_OFFSET);
-
- memset(ioc, 0, sizeof(*ioc));
- ioc->type = CARM_MSG_IOCTL;
- ioc->subtype = CARM_IOC_SCAN_CHAN;
- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
- ioc->data_addr = cpu_to_le32(msg_data);
-
- /* fill output data area with "no device" default values */
- mem += IOC_SCAN_CHAN_OFFSET;
- memset(mem, IOC_SCAN_CHAN_NODEV, CARM_MAX_PORTS);
-
- return IOC_SCAN_CHAN_OFFSET + CARM_MAX_PORTS;
-}
-
-static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_get_fw_ver *ioc = mem;
- u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) + sizeof(*ioc));
-
- memset(ioc, 0, sizeof(*ioc));
- ioc->type = CARM_MSG_MISC;
- ioc->subtype = MISC_GET_FW_VER;
- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
- ioc->data_addr = cpu_to_le32(msg_data);
-
- return sizeof(struct carm_msg_get_fw_ver) +
- sizeof(struct carm_fw_ver);
-}
-
-static inline void carm_push_q (struct carm_host *host, struct request_queue *q)
-{
- unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;
-
- blk_mq_stop_hw_queues(q);
- VPRINTK("STOPPED QUEUE %p\n", q);
-
- host->wait_q[idx] = q;
- host->wait_q_prod++;
- BUG_ON(host->wait_q_prod == host->wait_q_cons); /* overrun */
-}
-
-static inline struct request_queue *carm_pop_q(struct carm_host *host)
-{
- unsigned int idx;
-
- if (host->wait_q_prod == host->wait_q_cons)
- return NULL;
-
- idx = host->wait_q_cons % CARM_MAX_WAIT_Q;
- host->wait_q_cons++;
-
- return host->wait_q[idx];
-}
-
-static inline void carm_round_robin(struct carm_host *host)
-{
- struct request_queue *q = carm_pop_q(host);
- if (q) {
- blk_mq_start_hw_queues(q);
- VPRINTK("STARTED QUEUE %p\n", q);
- }
-}
-
-static inline enum dma_data_direction carm_rq_dir(struct request *rq)
-{
- return op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-}
-
-static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
-{
- struct request_queue *q = hctx->queue;
- struct request *rq = bd->rq;
- struct carm_port *port = q->queuedata;
- struct carm_host *host = port->host;
- struct carm_request *crq = blk_mq_rq_to_pdu(rq);
- struct carm_msg_rw *msg;
- struct scatterlist *sg;
- int i, n_elem = 0, rc;
- unsigned int msg_size;
- u32 tmp;
-
- crq->n_elem = 0;
- sg_init_table(crq->sg, CARM_MAX_REQ_SG);
-
- blk_mq_start_request(rq);
-
- spin_lock_irq(&host->lock);
- if (req_op(rq) == REQ_OP_DRV_OUT)
- goto send_msg;
-
- /* get scatterlist from block layer */
- sg = &crq->sg[0];
- n_elem = blk_rq_map_sg(q, rq, sg);
- if (n_elem <= 0)
- goto out_ioerr;
-
- /* map scatterlist to PCI bus addresses */
- n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, carm_rq_dir(rq));
- if (n_elem <= 0)
- goto out_ioerr;
-
- /* obey global hardware limit on S/G entries */
- if (host->hw_sg_used >= CARM_MAX_HOST_SG - n_elem)
- goto out_resource;
-
- crq->n_elem = n_elem;
- host->hw_sg_used += n_elem;
-
- /*
- * build read/write message
- */
-
- VPRINTK("build msg\n");
- msg = (struct carm_msg_rw *) carm_ref_msg(host, rq->tag);
-
- if (rq_data_dir(rq) == WRITE) {
- msg->type = CARM_MSG_WRITE;
- crq->msg_type = CARM_MSG_WRITE;
- } else {
- msg->type = CARM_MSG_READ;
- crq->msg_type = CARM_MSG_READ;
- }
-
- msg->id = port->port_no;
- msg->sg_count = n_elem;
- msg->sg_type = SGT_32BIT;
- msg->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
- msg->lba = cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
- tmp = (blk_rq_pos(rq) >> 16) >> 16;
- msg->lba_high = cpu_to_le16( (u16) tmp );
- msg->lba_count = cpu_to_le16(blk_rq_sectors(rq));
-
- msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);
- for (i = 0; i < n_elem; i++) {
- struct carm_msg_sg *carm_sg = &msg->sg[i];
- carm_sg->start = cpu_to_le32(sg_dma_address(&crq->sg[i]));
- carm_sg->len = cpu_to_le32(sg_dma_len(&crq->sg[i]));
- msg_size += sizeof(struct carm_msg_sg);
- }
-
- rc = carm_lookup_bucket(msg_size);
- BUG_ON(rc < 0);
- crq->msg_bucket = (u32) rc;
-send_msg:
- /*
- * queue read/write message to hardware
- */
- VPRINTK("send msg, tag == %u\n", rq->tag);
- rc = carm_send_msg(host, crq, rq->tag);
- if (rc) {
- host->hw_sg_used -= n_elem;
- goto out_resource;
- }
-
- spin_unlock_irq(&host->lock);
- return BLK_STS_OK;
-out_resource:
- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], n_elem, carm_rq_dir(rq));
- carm_push_q(host, q);
- spin_unlock_irq(&host->lock);
- return BLK_STS_DEV_RESOURCE;
-out_ioerr:
- carm_round_robin(host);
- spin_unlock_irq(&host->lock);
- return BLK_STS_IOERR;
-}
-
-static void carm_handle_array_info(struct carm_host *host,
- struct carm_request *crq, u8 *mem,
- blk_status_t error)
-{
- struct carm_port *port;
- u8 *msg_data = mem + sizeof(struct carm_array_info);
- struct carm_array_info *desc = (struct carm_array_info *) msg_data;
- u64 lo, hi;
- int cur_port;
- size_t slen;
-
- DPRINTK("ENTER\n");
-
- if (error)
- goto out;
- if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)
- goto out;
-
- cur_port = host->cur_scan_dev;
-
- /* should never occur */
- if ((cur_port < 0) || (cur_port >= CARM_MAX_PORTS)) {
- printk(KERN_ERR PFX "BUG: cur_scan_dev==%d, array_id==%d\n",
- cur_port, (int) desc->array_id);
- goto out;
- }
-
- port = &host->port[cur_port];
-
- lo = (u64) le32_to_cpu(desc->size);
- hi = (u64) le16_to_cpu(desc->size_hi);
-
- port->capacity = lo | (hi << 32);
- port->dev_geom_head = le16_to_cpu(desc->head);
- port->dev_geom_sect = le16_to_cpu(desc->sect);
- port->dev_geom_cyl = le16_to_cpu(desc->cyl);
-
- host->dev_active |= (1 << cur_port);
-
- strncpy(port->name, desc->name, sizeof(port->name));
- port->name[sizeof(port->name) - 1] = 0;
- slen = strlen(port->name);
- while (slen && (port->name[slen - 1] == ' ')) {
- port->name[slen - 1] = 0;
- slen--;
- }
-
- printk(KERN_INFO DRV_NAME "(%s): port %u device %Lu sectors\n",
- pci_name(host->pdev), port->port_no,
- (unsigned long long) port->capacity);
- printk(KERN_INFO DRV_NAME "(%s): port %u device \"%s\"\n",
- pci_name(host->pdev), port->port_no, port->name);
-
-out:
- assert(host->state == HST_DEV_SCAN);
- schedule_work(&host->fsm_task);
-}
-
-static void carm_handle_scan_chan(struct carm_host *host,
- struct carm_request *crq, u8 *mem,
- blk_status_t error)
-{
- u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
- unsigned int i, dev_count = 0;
- int new_state = HST_DEV_SCAN_START;
-
- DPRINTK("ENTER\n");
-
- if (error) {
- new_state = HST_ERROR;
- goto out;
- }
-
- /* TODO: scan and support non-disk devices */
- for (i = 0; i < 8; i++)
- if (msg_data[i] == 0) { /* direct-access device (disk) */
- host->dev_present |= (1 << i);
- dev_count++;
- }
-
- printk(KERN_INFO DRV_NAME "(%s): found %u interesting devices\n",
- pci_name(host->pdev), dev_count);
-
-out:
- assert(host->state == HST_PORT_SCAN);
- host->state = new_state;
- schedule_work(&host->fsm_task);
-}
-
-static void carm_handle_generic(struct carm_host *host,
- struct carm_request *crq, blk_status_t error,
- int cur_state, int next_state)
-{
- DPRINTK("ENTER\n");
-
- assert(host->state == cur_state);
- if (error)
- host->state = HST_ERROR;
- else
- host->state = next_state;
- schedule_work(&host->fsm_task);
-}
-
-static inline void carm_handle_resp(struct carm_host *host,
- __le32 ret_handle_le, u32 status)
-{
- u32 handle = le32_to_cpu(ret_handle_le);
- unsigned int msg_idx;
- struct request *rq;
- struct carm_request *crq;
- blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
- u8 *mem;
-
- VPRINTK("ENTER, handle == 0x%x\n", handle);
-
- if (unlikely(!TAG_VALID(handle))) {
- printk(KERN_ERR DRV_NAME "(%s): BUG: invalid tag 0x%x\n",
- pci_name(host->pdev), handle);
- return;
- }
-
- msg_idx = TAG_DECODE(handle);
- VPRINTK("tag == %u\n", msg_idx);
-
- rq = blk_mq_tag_to_rq(host->tag_set.tags[0], msg_idx);
- crq = blk_mq_rq_to_pdu(rq);
-
- /* fast path */
- if (likely(crq->msg_type == CARM_MSG_READ ||
- crq->msg_type == CARM_MSG_WRITE)) {
- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem,
- carm_rq_dir(rq));
- goto done;
- }
-
- mem = carm_ref_msg(host, msg_idx);
-
- switch (crq->msg_type) {
- case CARM_MSG_IOCTL: {
- switch (crq->msg_subtype) {
- case CARM_IOC_SCAN_CHAN:
- carm_handle_scan_chan(host, crq, mem, error);
- goto done;
- default:
- /* unknown / invalid response */
- goto err_out;
- }
- break;
- }
-
- case CARM_MSG_MISC: {
- switch (crq->msg_subtype) {
- case MISC_ALLOC_MEM:
- carm_handle_generic(host, crq, error,
- HST_ALLOC_BUF, HST_SYNC_TIME);
- goto done;
- case MISC_SET_TIME:
- carm_handle_generic(host, crq, error,
- HST_SYNC_TIME, HST_GET_FW_VER);
- goto done;
- case MISC_GET_FW_VER: {
- struct carm_fw_ver *ver = (struct carm_fw_ver *)
- (mem + sizeof(struct carm_msg_get_fw_ver));
- if (!error) {
- host->fw_ver = le32_to_cpu(ver->version);
- host->flags |= (ver->features & FL_FW_VER_MASK);
- }
- carm_handle_generic(host, crq, error,
- HST_GET_FW_VER, HST_PORT_SCAN);
- goto done;
- }
- default:
- /* unknown / invalid response */
- goto err_out;
- }
- break;
- }
-
- case CARM_MSG_ARRAY: {
- switch (crq->msg_subtype) {
- case CARM_ARRAY_INFO:
- carm_handle_array_info(host, crq, mem, error);
- break;
- default:
- /* unknown / invalid response */
- goto err_out;
- }
- break;
- }
-
- default:
- /* unknown / invalid response */
- goto err_out;
- }
-
- return;
-
-err_out:
- printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
- pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
- error = BLK_STS_IOERR;
-done:
- host->hw_sg_used -= crq->n_elem;
- blk_mq_end_request(blk_mq_rq_from_pdu(crq), error);
-
- if (host->hw_sg_used <= CARM_SG_LOW_WATER)
- carm_round_robin(host);
-}
-
-static inline void carm_handle_responses(struct carm_host *host)
-{
- void __iomem *mmio = host->mmio;
- struct carm_response *resp = (struct carm_response *) host->shm;
- unsigned int work = 0;
- unsigned int idx = host->resp_idx % RMSG_Q_LEN;
-
- while (1) {
- u32 status = le32_to_cpu(resp[idx].status);
-
- if (status == 0xffffffff) {
- VPRINTK("ending response on index %u\n", idx);
- writel(idx << 3, mmio + CARM_RESP_IDX);
- break;
- }
-
- /* response to a message we sent */
- else if ((status & (1 << 31)) == 0) {
- VPRINTK("handling msg response on index %u\n", idx);
- carm_handle_resp(host, resp[idx].ret_handle, status);
- resp[idx].status = cpu_to_le32(0xffffffff);
- }
-
- /* asynchronous events the hardware throws our way */
- else if ((status & 0xff000000) == (1 << 31)) {
- u8 *evt_type_ptr = (u8 *) &resp[idx];
- u8 evt_type = *evt_type_ptr;
- printk(KERN_WARNING DRV_NAME "(%s): unhandled event type %d\n",
- pci_name(host->pdev), (int) evt_type);
- resp[idx].status = cpu_to_le32(0xffffffff);
- }
-
- idx = NEXT_RESP(idx);
- work++;
- }
-
- VPRINTK("EXIT, work==%u\n", work);
- host->resp_idx += work;
-}
-
-static irqreturn_t carm_interrupt(int irq, void *__host)
-{
- struct carm_host *host = __host;
- void __iomem *mmio;
- u32 mask;
- int handled = 0;
- unsigned long flags;
-
- if (!host) {
- VPRINTK("no host\n");
- return IRQ_NONE;
- }
-
- spin_lock_irqsave(&host->lock, flags);
-
- mmio = host->mmio;
-
- /* reading should also clear interrupts */
- mask = readl(mmio + CARM_INT_STAT);
-
- if (mask == 0 || mask == 0xffffffff) {
- VPRINTK("no work, mask == 0x%x\n", mask);
- goto out;
- }
-
- if (mask & INT_ACK_MASK)
- writel(mask, mmio + CARM_INT_STAT);
-
- if (unlikely(host->state == HST_INVALID)) {
- VPRINTK("not initialized yet, mask = 0x%x\n", mask);
- goto out;
- }
-
- if (mask & CARM_HAVE_RESP) {
- handled = 1;
- carm_handle_responses(host);
- }
-
-out:
- spin_unlock_irqrestore(&host->lock, flags);
- VPRINTK("EXIT\n");
- return IRQ_RETVAL(handled);
-}
-
-static void carm_fsm_task (struct work_struct *work)
-{
- struct carm_host *host =
- container_of(work, struct carm_host, fsm_task);
- unsigned long flags;
- unsigned int state;
- int rc, i, next_dev;
- int reschedule = 0;
- int new_state = HST_INVALID;
-
- spin_lock_irqsave(&host->lock, flags);
- state = host->state;
- spin_unlock_irqrestore(&host->lock, flags);
-
- DPRINTK("ENTER, state == %s\n", state_name[state]);
-
- switch (state) {
- case HST_PROBE_START:
- new_state = HST_ALLOC_BUF;
- reschedule = 1;
- break;
-
- case HST_ALLOC_BUF:
- rc = carm_send_special(host, carm_fill_alloc_buf);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_SYNC_TIME:
- rc = carm_send_special(host, carm_fill_sync_time);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_GET_FW_VER:
- rc = carm_send_special(host, carm_fill_get_fw_ver);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_PORT_SCAN:
- rc = carm_send_special(host, carm_fill_scan_channels);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_DEV_SCAN_START:
- host->cur_scan_dev = -1;
- new_state = HST_DEV_SCAN;
- reschedule = 1;
- break;
-
- case HST_DEV_SCAN:
- next_dev = -1;
- for (i = host->cur_scan_dev + 1; i < CARM_MAX_PORTS; i++)
- if (host->dev_present & (1 << i)) {
- next_dev = i;
- break;
- }
-
- if (next_dev >= 0) {
- host->cur_scan_dev = next_dev;
- rc = carm_array_info(host, next_dev);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- } else {
- new_state = HST_DEV_ACTIVATE;
- reschedule = 1;
- }
- break;
-
- case HST_DEV_ACTIVATE: {
- int activated = 0;
- for (i = 0; i < CARM_MAX_PORTS; i++)
- if (host->dev_active & (1 << i)) {
- struct carm_port *port = &host->port[i];
- struct gendisk *disk = port->disk;
-
- set_capacity(disk, port->capacity);
- add_disk(disk);
- activated++;
- }
-
- printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n",
- pci_name(host->pdev), activated);
-
- new_state = HST_PROBE_FINISHED;
- reschedule = 1;
- break;
- }
-
- case HST_PROBE_FINISHED:
- complete(&host->probe_comp);
- break;
-
- case HST_ERROR:
- /* FIXME: TODO */
- break;
-
- default:
- /* should never occur */
- printk(KERN_ERR PFX "BUG: unknown state %d\n", state);
- assert(0);
- break;
- }
-
- if (new_state != HST_INVALID) {
- spin_lock_irqsave(&host->lock, flags);
- host->state = new_state;
- spin_unlock_irqrestore(&host->lock, flags);
- }
- if (reschedule)
- schedule_work(&host->fsm_task);
-}
-
-static int carm_init_wait(void __iomem *mmio, u32 bits, unsigned int test_bit)
-{
- unsigned int i;
-
- for (i = 0; i < 50000; i++) {
- u32 tmp = readl(mmio + CARM_LMUC);
- udelay(100);
-
- if (test_bit) {
- if ((tmp & bits) == bits)
- return 0;
- } else {
- if ((tmp & bits) == 0)
- return 0;
- }
-
- cond_resched();
- }
-
- printk(KERN_ERR PFX "carm_init_wait timeout, bits == 0x%x, test_bit == %s\n",
- bits, test_bit ? "yes" : "no");
- return -EBUSY;
-}
-
-static void carm_init_responses(struct carm_host *host)
-{
- void __iomem *mmio = host->mmio;
- unsigned int i;
- struct carm_response *resp = (struct carm_response *) host->shm;
-
- for (i = 0; i < RMSG_Q_LEN; i++)
- resp[i].status = cpu_to_le32(0xffffffff);
-
- writel(0, mmio + CARM_RESP_IDX);
-}
-
-static int carm_init_host(struct carm_host *host)
-{
- void __iomem *mmio = host->mmio;
- u32 tmp;
- u8 tmp8;
- int rc;
-
- DPRINTK("ENTER\n");
-
- writel(0, mmio + CARM_INT_MASK);
-
- tmp8 = readb(mmio + CARM_INITC);
- if (tmp8 & 0x01) {
- tmp8 &= ~0x01;
- writeb(tmp8, mmio + CARM_INITC);
- readb(mmio + CARM_INITC); /* flush */
-
- DPRINTK("snooze...\n");
- msleep(5000);
- }
-
- tmp = readl(mmio + CARM_HMUC);
- if (tmp & CARM_CME) {
- DPRINTK("CME bit present, waiting\n");
- rc = carm_init_wait(mmio, CARM_CME, 1);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 1 failed\n");
- return rc;
- }
- }
- if (tmp & CARM_RME) {
- DPRINTK("RME bit present, waiting\n");
- rc = carm_init_wait(mmio, CARM_RME, 1);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 2 failed\n");
- return rc;
- }
- }
-
- tmp &= ~(CARM_RME | CARM_CME);
- writel(tmp, mmio + CARM_HMUC);
- readl(mmio + CARM_HMUC); /* flush */
-
- rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 0);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 3 failed\n");
- return rc;
- }
-
- carm_init_buckets(mmio);
-
- writel(host->shm_dma & 0xffffffff, mmio + RBUF_ADDR_LO);
- writel((host->shm_dma >> 16) >> 16, mmio + RBUF_ADDR_HI);
- writel(RBUF_LEN, mmio + RBUF_BYTE_SZ);
-
- tmp = readl(mmio + CARM_HMUC);
- tmp |= (CARM_RME | CARM_CME | CARM_WZBC);
- writel(tmp, mmio + CARM_HMUC);
- readl(mmio + CARM_HMUC); /* flush */
-
- rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 1);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 4 failed\n");
- return rc;
- }
-
- writel(0, mmio + CARM_HMPHA);
- writel(INT_DEF_MASK, mmio + CARM_INT_MASK);
-
- carm_init_responses(host);
-
- /* start initialization, probing state machine */
- spin_lock_irq(&host->lock);
- assert(host->state == HST_INVALID);
- host->state = HST_PROBE_START;
- spin_unlock_irq(&host->lock);
- schedule_work(&host->fsm_task);
-
- DPRINTK("EXIT\n");
- return 0;
-}
-
-static const struct blk_mq_ops carm_mq_ops = {
- .queue_rq = carm_queue_rq,
-};
-
-static int carm_init_disk(struct carm_host *host, unsigned int port_no)
-{
- struct carm_port *port = &host->port[port_no];
- struct gendisk *disk;
- struct request_queue *q;
-
- port->host = host;
- port->port_no = port_no;
-
- disk = alloc_disk(CARM_MINORS_PER_MAJOR);
- if (!disk)
- return -ENOMEM;
-
- port->disk = disk;
- sprintf(disk->disk_name, DRV_NAME "/%u",
- (unsigned int)host->id * CARM_MAX_PORTS + port_no);
- disk->major = host->major;
- disk->first_minor = port_no * CARM_MINORS_PER_MAJOR;
- disk->fops = &carm_bd_ops;
- disk->private_data = port;
-
- q = blk_mq_init_queue(&host->tag_set);
- if (IS_ERR(q))
- return PTR_ERR(q);
-
- blk_queue_max_segments(q, CARM_MAX_REQ_SG);
- blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
-
- q->queuedata = port;
- disk->queue = q;
- return 0;
-}
-
-static void carm_free_disk(struct carm_host *host, unsigned int port_no)
-{
- struct carm_port *port = &host->port[port_no];
- struct gendisk *disk = port->disk;
-
- if (!disk)
- return;
-
- if (disk->flags & GENHD_FL_UP)
- del_gendisk(disk);
- if (disk->queue)
- blk_cleanup_queue(disk->queue);
- put_disk(disk);
-}
-
-static int carm_init_shm(struct carm_host *host)
-{
- host->shm = dma_alloc_coherent(&host->pdev->dev, CARM_SHM_SIZE,
- &host->shm_dma, GFP_KERNEL);
- if (!host->shm)
- return -ENOMEM;
-
- host->msg_base = host->shm + RBUF_LEN;
- host->msg_dma = host->shm_dma + RBUF_LEN;
-
- memset(host->shm, 0xff, RBUF_LEN);
- memset(host->msg_base, 0, PDC_SHM_SIZE - RBUF_LEN);
-
- return 0;
-}
-
-static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
-{
- struct carm_host *host;
- int rc;
- struct request_queue *q;
- unsigned int i;
-
- printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
-
- rc = pci_enable_device(pdev);
- if (rc)
- return rc;
-
- rc = pci_request_regions(pdev, DRV_NAME);
- if (rc)
- goto err_out;
-
- rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
- if (rc) {
- printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
- pci_name(pdev));
- goto err_out_regions;
- }
-
- host = kzalloc(sizeof(*host), GFP_KERNEL);
- if (!host) {
- printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n",
- pci_name(pdev));
- rc = -ENOMEM;
- goto err_out_regions;
- }
-
- host->pdev = pdev;
- spin_lock_init(&host->lock);
- INIT_WORK(&host->fsm_task, carm_fsm_task);
- init_completion(&host->probe_comp);
-
- host->mmio = ioremap(pci_resource_start(pdev, 0),
- pci_resource_len(pdev, 0));
- if (!host->mmio) {
- printk(KERN_ERR DRV_NAME "(%s): MMIO alloc failure\n",
- pci_name(pdev));
- rc = -ENOMEM;
- goto err_out_kfree;
- }
-
- rc = carm_init_shm(host);
- if (rc) {
- printk(KERN_ERR DRV_NAME "(%s): DMA SHM alloc failure\n",
- pci_name(pdev));
- goto err_out_iounmap;
- }
-
- memset(&host->tag_set, 0, sizeof(host->tag_set));
- host->tag_set.ops = &carm_mq_ops;
- host->tag_set.cmd_size = sizeof(struct carm_request);
- host->tag_set.nr_hw_queues = 1;
- host->tag_set.nr_maps = 1;
- host->tag_set.queue_depth = max_queue;
- host->tag_set.numa_node = NUMA_NO_NODE;
- host->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-
- rc = blk_mq_alloc_tag_set(&host->tag_set);
- if (rc)
- goto err_out_dma_free;
-
- q = blk_mq_init_queue(&host->tag_set);
- if (IS_ERR(q)) {
- rc = PTR_ERR(q);
- blk_mq_free_tag_set(&host->tag_set);
- goto err_out_dma_free;
- }
-
- host->oob_q = q;
- q->queuedata = host;
-
- /*
- * Figure out which major to use: 160, 161, or dynamic
- */
- if (!test_and_set_bit(0, &carm_major_alloc))
- host->major = 160;
- else if (!test_and_set_bit(1, &carm_major_alloc))
- host->major = 161;
- else
- host->flags |= FL_DYN_MAJOR;
-
- host->id = carm_host_id;
- sprintf(host->name, DRV_NAME "%d", carm_host_id);
-
- rc = register_blkdev(host->major, host->name);
- if (rc < 0)
- goto err_out_free_majors;
- if (host->flags & FL_DYN_MAJOR)
- host->major = rc;
-
- for (i = 0; i < CARM_MAX_PORTS; i++) {
- rc = carm_init_disk(host, i);
- if (rc)
- goto err_out_blkdev_disks;
- }
-
- pci_set_master(pdev);
-
- rc = request_irq(pdev->irq, carm_interrupt, IRQF_SHARED, DRV_NAME, host);
- if (rc) {
- printk(KERN_ERR DRV_NAME "(%s): irq alloc failure\n",
- pci_name(pdev));
- goto err_out_blkdev_disks;
- }
-
- rc = carm_init_host(host);
- if (rc)
- goto err_out_free_irq;
-
- DPRINTK("waiting for probe_comp\n");
- wait_for_completion(&host->probe_comp);
-
- printk(KERN_INFO "%s: pci %s, ports %d, io %llx, irq %u, major %d\n",
- host->name, pci_name(pdev), (int) CARM_MAX_PORTS,
- (unsigned long long)pci_resource_start(pdev, 0),
- pdev->irq, host->major);
-
- carm_host_id++;
- pci_set_drvdata(pdev, host);
- return 0;
-
-err_out_free_irq:
- free_irq(pdev->irq, host);
-err_out_blkdev_disks:
- for (i = 0; i < CARM_MAX_PORTS; i++)
- carm_free_disk(host, i);
- unregister_blkdev(host->major, host->name);
-err_out_free_majors:
- if (host->major == 160)
- clear_bit(0, &carm_major_alloc);
- else if (host->major == 161)
- clear_bit(1, &carm_major_alloc);
- blk_cleanup_queue(host->oob_q);
- blk_mq_free_tag_set(&host->tag_set);
-err_out_dma_free:
- dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
-err_out_iounmap:
- iounmap(host->mmio);
-err_out_kfree:
- kfree(host);
-err_out_regions:
- pci_release_regions(pdev);
-err_out:
- pci_disable_device(pdev);
- return rc;
-}
-
-static void carm_remove_one (struct pci_dev *pdev)
-{
- struct carm_host *host = pci_get_drvdata(pdev);
- unsigned int i;
-
- if (!host) {
- printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",
- pci_name(pdev));
- return;
- }
-
- free_irq(pdev->irq, host);
- for (i = 0; i < CARM_MAX_PORTS; i++)
- carm_free_disk(host, i);
- unregister_blkdev(host->major, host->name);
- if (host->major == 160)
- clear_bit(0, &carm_major_alloc);
- else if (host->major == 161)
- clear_bit(1, &carm_major_alloc);
- blk_cleanup_queue(host->oob_q);
- blk_mq_free_tag_set(&host->tag_set);
- dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
- iounmap(host->mmio);
- kfree(host);
- pci_release_regions(pdev);
- pci_disable_device(pdev);
-}
-
-module_pci_driver(carm_driver);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 9b3ea86c20e5..3afc07b59477 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1063,14 +1063,15 @@ static int virtblk_freeze(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
+ /* Ensure no requests in virtqueues before deleting vqs. */
+ blk_mq_freeze_queue(vblk->disk->queue);
+
/* Ensure we don't receive any more interrupts */
vdev->config->reset(vdev);
/* Make sure no work handler is accessing the device. */
flush_work(&vblk->config_work);
- blk_mq_quiesce_queue(vblk->disk->queue);
-
vdev->config->del_vqs(vdev);
kfree(vblk->vqs);
@@ -1088,7 +1089,7 @@ static int virtblk_restore(struct virtio_device *vdev)
virtio_device_ready(vdev);
- blk_mq_unquiesce_queue(vblk->disk->queue);
+ blk_mq_unfreeze_queue(vblk->disk->queue);
return 0;
}
#endif
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d0538c03f033..da67621ebc21 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -779,7 +779,8 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
ring_req->u.rw.handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
- if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) {
+ if (req_op(req) == REQ_OP_FLUSH ||
+ (req_op(req) == REQ_OP_WRITE && (req->cmd_flags & REQ_FUA))) {
/*
* Ideally we can do an unordered flush-to-disk.
* In case the backend onlysupports barriers, use that.