aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/rbd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/rbd.c')
-rw-r--r--drivers/block/rbd.c537
1 files changed, 299 insertions, 238 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 55745d6953f0..fd0cdced8024 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -626,9 +626,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
static int rbd_dev_refresh(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
-static int rbd_dev_header_info(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header);
static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
u64 snap_id);
static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
@@ -1098,15 +1097,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev)
RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
}
+static void rbd_image_header_cleanup(struct rbd_image_header *header)
+{
+ kfree(header->object_prefix);
+ ceph_put_snap_context(header->snapc);
+ kfree(header->snap_sizes);
+ kfree(header->snap_names);
+
+ memset(header, 0, sizeof(*header));
+}
+
/*
* Fill an rbd image header with information from the given format 1
* on-disk header.
*/
-static int rbd_header_from_disk(struct rbd_device *rbd_dev,
- struct rbd_image_header_ondisk *ondisk)
+static int rbd_header_from_disk(struct rbd_image_header *header,
+ struct rbd_image_header_ondisk *ondisk,
+ bool first_time)
{
- struct rbd_image_header *header = &rbd_dev->header;
- bool first_time = header->object_prefix == NULL;
struct ceph_snap_context *snapc;
char *object_prefix = NULL;
char *snap_names = NULL;
@@ -1173,11 +1181,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
if (first_time) {
header->object_prefix = object_prefix;
header->obj_order = ondisk->options.order;
- rbd_init_layout(rbd_dev);
- } else {
- ceph_put_snap_context(header->snapc);
- kfree(header->snap_names);
- kfree(header->snap_sizes);
}
/* The remaining fields always get updated (when we refresh) */
@@ -1493,14 +1496,30 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
/*
* Must be called after rbd_obj_calc_img_extents().
*/
-static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req)
+static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
{
- if (!obj_req->num_img_extents ||
- (rbd_obj_is_entire(obj_req) &&
- !obj_req->img_request->snapc->num_snaps))
- return false;
+ rbd_assert(obj_req->img_request->snapc);
- return true;
+ if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
+ dout("%s %p objno %llu discard\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return;
+ }
+
+ if (!obj_req->num_img_extents) {
+ dout("%s %p objno %llu not overlapping\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return;
+ }
+
+ if (rbd_obj_is_entire(obj_req) &&
+ !obj_req->img_request->snapc->num_snaps) {
+ dout("%s %p objno %llu entire\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return;
+ }
+
+ obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
}
static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
@@ -1599,6 +1618,7 @@ __rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
static struct ceph_osd_request *
rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
{
+ rbd_assert(obj_req->img_request->snapc);
return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
num_ops);
}
@@ -1727,11 +1747,14 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
* Caller is responsible for filling in the list of object requests
* that comprises the image request, and the Linux request pointer
* (if there is one).
+ *
+ * Only snap_id is captured here, for reads. For writes, snapshot
+ * context is captured in rbd_img_object_requests() after exclusive
+ * lock is ensured to be held.
*/
static struct rbd_img_request *rbd_img_request_create(
struct rbd_device *rbd_dev,
- enum obj_operation_type op_type,
- struct ceph_snap_context *snapc)
+ enum obj_operation_type op_type)
{
struct rbd_img_request *img_request;
@@ -1743,8 +1766,6 @@ static struct rbd_img_request *rbd_img_request_create(
img_request->op_type = op_type;
if (!rbd_img_is_write(img_request))
img_request->snap_id = rbd_dev->spec->snap_id;
- else
- img_request->snapc = snapc;
if (rbd_dev_parent_get(rbd_dev))
img_request_layered_set(img_request);
@@ -2087,7 +2108,7 @@ static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
struct ceph_osd_data *osd_data;
u64 objno;
- u8 state, new_state, uninitialized_var(current_state);
+ u8 state, new_state, current_state;
bool has_current_state;
void *p;
@@ -2389,9 +2410,6 @@ static int rbd_obj_init_write(struct rbd_obj_request *obj_req)
if (ret)
return ret;
- if (rbd_obj_copyup_enabled(obj_req))
- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
-
obj_req->write_state = RBD_OBJ_WRITE_START;
return 0;
}
@@ -2497,8 +2515,6 @@ static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
if (ret)
return ret;
- if (rbd_obj_copyup_enabled(obj_req))
- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
if (!obj_req->num_img_extents) {
obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
if (rbd_obj_is_entire(obj_req))
@@ -2935,7 +2951,7 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
int ret;
child_img_req = rbd_img_request_create(img_req->rbd_dev->parent,
- OBJ_OP_READ, NULL);
+ OBJ_OP_READ);
if (!child_img_req)
return -ENOMEM;
@@ -3439,6 +3455,7 @@ again:
case RBD_OBJ_WRITE_START:
rbd_assert(!*result);
+ rbd_obj_set_copyup_enabled(obj_req);
if (rbd_obj_write_is_noop(obj_req))
return true;
@@ -3586,14 +3603,15 @@ static bool rbd_lock_add_request(struct rbd_img_request *img_req)
static void rbd_lock_del_request(struct rbd_img_request *img_req)
{
struct rbd_device *rbd_dev = img_req->rbd_dev;
- bool need_wakeup;
+ bool need_wakeup = false;
lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock);
- rbd_assert(!list_empty(&img_req->lock_item));
- list_del_init(&img_req->lock_item);
- need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
- list_empty(&rbd_dev->running_list));
+ if (!list_empty(&img_req->lock_item)) {
+ list_del_init(&img_req->lock_item);
+ need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
+ list_empty(&rbd_dev->running_list));
+ }
spin_unlock(&rbd_dev->lock_lists_lock);
if (need_wakeup)
complete(&rbd_dev->releasing_wait);
@@ -3625,9 +3643,19 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
static void rbd_img_object_requests(struct rbd_img_request *img_req)
{
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
struct rbd_obj_request *obj_req;
rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
+ rbd_assert(!need_exclusive_lock(img_req) ||
+ __rbd_is_lock_owner(rbd_dev));
+
+ if (rbd_img_is_write(img_req)) {
+ rbd_assert(!img_req->snapc);
+ down_read(&rbd_dev->header_rwsem);
+ img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+ up_read(&rbd_dev->header_rwsem);
+ }
for_each_obj_request(img_req, obj_req) {
int result = 0;
@@ -3645,7 +3673,6 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
{
- struct rbd_device *rbd_dev = img_req->rbd_dev;
int ret;
again:
@@ -3666,9 +3693,6 @@ again:
if (*result)
return true;
- rbd_assert(!need_exclusive_lock(img_req) ||
- __rbd_is_lock_owner(rbd_dev));
-
rbd_img_object_requests(img_req);
if (!img_req->pending.num_pending) {
*result = img_req->pending.result;
@@ -3974,14 +3998,19 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
return;
}
- list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) {
+ while (!list_empty(&rbd_dev->acquiring_list)) {
+ img_req = list_first_entry(&rbd_dev->acquiring_list,
+ struct rbd_img_request, lock_item);
mutex_lock(&img_req->state_mutex);
rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
+ if (!result)
+ list_move_tail(&img_req->lock_item,
+ &rbd_dev->running_list);
+ else
+ list_del_init(&img_req->lock_item);
rbd_img_schedule(img_req, result);
mutex_unlock(&img_req->state_mutex);
}
-
- list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
}
static int get_lock_owner_info(struct rbd_device *rbd_dev,
@@ -4130,6 +4159,10 @@ static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
{
int ret;
+ ret = rbd_dev_refresh(rbd_dev);
+ if (ret)
+ return ret;
+
if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
ret = rbd_object_map_open(rbd_dev);
if (ret)
@@ -4788,7 +4821,6 @@ static void rbd_queue_workfn(struct work_struct *work)
struct request *rq = blk_mq_rq_from_pdu(work);
struct rbd_device *rbd_dev = rq->q->queuedata;
struct rbd_img_request *img_request;
- struct ceph_snap_context *snapc = NULL;
u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
u64 length = blk_rq_bytes(rq);
enum obj_operation_type op_type;
@@ -4853,10 +4885,6 @@ static void rbd_queue_workfn(struct work_struct *work)
down_read(&rbd_dev->header_rwsem);
mapping_size = rbd_dev->mapping.size;
- if (op_type != OBJ_OP_READ) {
- snapc = rbd_dev->header.snapc;
- ceph_get_snap_context(snapc);
- }
up_read(&rbd_dev->header_rwsem);
if (offset + length > mapping_size) {
@@ -4866,13 +4894,12 @@ static void rbd_queue_workfn(struct work_struct *work)
goto err_rq;
}
- img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
+ img_request = rbd_img_request_create(rbd_dev, op_type);
if (!img_request) {
result = -ENOMEM;
goto err_rq;
}
img_request->rq = rq;
- snapc = NULL; /* img_request consumes a ref */
dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev,
img_request, obj_op_name(op_type), offset, length);
@@ -4894,7 +4921,6 @@ err_rq:
if (result)
rbd_warn(rbd_dev, "%s %llx at %llx result %d",
obj_op_name(op_type), length, offset, result);
- ceph_put_snap_context(snapc);
err:
blk_mq_end_request(rq, errno_to_blk_status(result));
}
@@ -4966,7 +4992,9 @@ out_req:
* return, the rbd_dev->header field will contain up-to-date
* information about the image.
*/
-static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header,
+ bool first_time)
{
struct rbd_image_header_ondisk *ondisk = NULL;
u32 snap_count = 0;
@@ -5014,7 +5042,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
snap_count = le32_to_cpu(ondisk->snap_count);
} while (snap_count != want_count);
- ret = rbd_header_from_disk(rbd_dev, ondisk);
+ ret = rbd_header_from_disk(header, ondisk, first_time);
out:
kfree(ondisk);
@@ -5058,43 +5086,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev)
}
}
-static int rbd_dev_refresh(struct rbd_device *rbd_dev)
-{
- u64 mapping_size;
- int ret;
-
- down_write(&rbd_dev->header_rwsem);
- mapping_size = rbd_dev->mapping.size;
-
- ret = rbd_dev_header_info(rbd_dev);
- if (ret)
- goto out;
-
- /*
- * If there is a parent, see if it has disappeared due to the
- * mapped image getting flattened.
- */
- if (rbd_dev->parent) {
- ret = rbd_dev_v2_parent_info(rbd_dev);
- if (ret)
- goto out;
- }
-
- if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
- rbd_dev->mapping.size = rbd_dev->header.image_size;
- } else {
- /* validate mapped snapshot's EXISTS flag */
- rbd_exists_validate(rbd_dev);
- }
-
-out:
- up_write(&rbd_dev->header_rwsem);
- if (!ret && mapping_size != rbd_dev->mapping.size)
- rbd_dev_update_size(rbd_dev);
-
- return ret;
-}
-
static int rbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
unsigned int hctx_idx, unsigned int numa_node)
{
@@ -5529,8 +5520,7 @@ static void rbd_dev_release(struct device *dev)
module_put(THIS_MODULE);
}
-static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
- struct rbd_spec *spec)
+static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec)
{
struct rbd_device *rbd_dev;
@@ -5575,9 +5565,6 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
rbd_dev->dev.parent = &rbd_root_dev;
device_initialize(&rbd_dev->dev);
- rbd_dev->rbd_client = rbdc;
- rbd_dev->spec = spec;
-
return rbd_dev;
}
@@ -5590,12 +5577,10 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
{
struct rbd_device *rbd_dev;
- rbd_dev = __rbd_dev_create(rbdc, spec);
+ rbd_dev = __rbd_dev_create(spec);
if (!rbd_dev)
return NULL;
- rbd_dev->opts = opts;
-
/* get an id and fill in device name */
rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0,
minor_to_rbd_dev_id(1 << MINORBITS),
@@ -5612,6 +5597,10 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
/* we have a ref from do_rbd_add() */
__module_get(THIS_MODULE);
+ rbd_dev->rbd_client = rbdc;
+ rbd_dev->spec = spec;
+ rbd_dev->opts = opts;
+
dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id);
return rbd_dev;
@@ -5666,17 +5655,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
return 0;
}
-static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
-{
- return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
- &rbd_dev->header.obj_order,
- &rbd_dev->header.image_size);
-}
-
-static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev,
+ char **pobject_prefix)
{
size_t size;
void *reply_buf;
+ char *object_prefix;
int ret;
void *p;
@@ -5694,16 +5678,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
goto out;
p = reply_buf;
- rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
- p + ret, NULL, GFP_NOIO);
+ object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL,
+ GFP_NOIO);
+ if (IS_ERR(object_prefix)) {
+ ret = PTR_ERR(object_prefix);
+ goto out;
+ }
ret = 0;
- if (IS_ERR(rbd_dev->header.object_prefix)) {
- ret = PTR_ERR(rbd_dev->header.object_prefix);
- rbd_dev->header.object_prefix = NULL;
- } else {
- dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
- }
+ *pobject_prefix = object_prefix;
+ dout(" object_prefix = %s\n", object_prefix);
out:
kfree(reply_buf);
@@ -5748,12 +5732,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
return 0;
}
-static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
-{
- return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
- &rbd_dev->header.features);
-}
-
/*
* These are generic image flags, but since they are used only for
* object map, store them in rbd_dev->object_map_flags.
@@ -5790,6 +5768,14 @@ struct parent_image_info {
u64 overlap;
};
+static void rbd_parent_info_cleanup(struct parent_image_info *pii)
+{
+ kfree(pii->pool_ns);
+ kfree(pii->image_id);
+
+ memset(pii, 0, sizeof(*pii));
+}
+
/*
* The caller is responsible for @pii.
*/
@@ -5859,6 +5845,9 @@ static int __get_parent_info(struct rbd_device *rbd_dev,
if (pii->has_overlap)
ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
+ pii->has_overlap, pii->overlap);
return 0;
e_inval:
@@ -5897,14 +5886,17 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
pii->has_overlap = true;
ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
+ pii->has_overlap, pii->overlap);
return 0;
e_inval:
return -EINVAL;
}
-static int get_parent_info(struct rbd_device *rbd_dev,
- struct parent_image_info *pii)
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev,
+ struct parent_image_info *pii)
{
struct page *req_page, *reply_page;
void *p;
@@ -5932,7 +5924,7 @@ static int get_parent_info(struct rbd_device *rbd_dev,
return ret;
}
-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+static int rbd_dev_setup_parent(struct rbd_device *rbd_dev)
{
struct rbd_spec *parent_spec;
struct parent_image_info pii = { 0 };
@@ -5942,37 +5934,12 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
if (!parent_spec)
return -ENOMEM;
- ret = get_parent_info(rbd_dev, &pii);
+ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
if (ret)
goto out_err;
- dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
- __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
- pii.has_overlap, pii.overlap);
-
- if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
- /*
- * Either the parent never existed, or we have
- * record of it but the image got flattened so it no
- * longer has a parent. When the parent of a
- * layered image disappears we immediately set the
- * overlap to 0. The effect of this is that all new
- * requests will be treated as if the image had no
- * parent.
- *
- * If !pii.has_overlap, the parent image spec is not
- * applicable. It's there to avoid duplication in each
- * snapshot record.
- */
- if (rbd_dev->parent_overlap) {
- rbd_dev->parent_overlap = 0;
- rbd_dev_parent_put(rbd_dev);
- pr_info("%s: clone image has been flattened\n",
- rbd_dev->disk->disk_name);
- }
-
+ if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap)
goto out; /* No parent? No problem. */
- }
/* The ceph file layout needs to fit pool id in 32 bits */
@@ -5984,58 +5951,46 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
}
/*
- * The parent won't change (except when the clone is
- * flattened, already handled that). So we only need to
- * record the parent spec we have not already done so.
+ * The parent won't change except when the clone is flattened,
+ * so we only need to record the parent image spec once.
*/
- if (!rbd_dev->parent_spec) {
- parent_spec->pool_id = pii.pool_id;
- if (pii.pool_ns && *pii.pool_ns) {
- parent_spec->pool_ns = pii.pool_ns;
- pii.pool_ns = NULL;
- }
- parent_spec->image_id = pii.image_id;
- pii.image_id = NULL;
- parent_spec->snap_id = pii.snap_id;
-
- rbd_dev->parent_spec = parent_spec;
- parent_spec = NULL; /* rbd_dev now owns this */
+ parent_spec->pool_id = pii.pool_id;
+ if (pii.pool_ns && *pii.pool_ns) {
+ parent_spec->pool_ns = pii.pool_ns;
+ pii.pool_ns = NULL;
}
+ parent_spec->image_id = pii.image_id;
+ pii.image_id = NULL;
+ parent_spec->snap_id = pii.snap_id;
+
+ rbd_assert(!rbd_dev->parent_spec);
+ rbd_dev->parent_spec = parent_spec;
+ parent_spec = NULL; /* rbd_dev now owns this */
/*
- * We always update the parent overlap. If it's zero we issue
- * a warning, as we will proceed as if there was no parent.
+ * Record the parent overlap. If it's zero, issue a warning as
+ * we will proceed as if there is no parent.
*/
- if (!pii.overlap) {
- if (parent_spec) {
- /* refresh, careful to warn just once */
- if (rbd_dev->parent_overlap)
- rbd_warn(rbd_dev,
- "clone now standalone (overlap became 0)");
- } else {
- /* initial probe */
- rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
- }
- }
+ if (!pii.overlap)
+ rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
rbd_dev->parent_overlap = pii.overlap;
out:
ret = 0;
out_err:
- kfree(pii.pool_ns);
- kfree(pii.image_id);
+ rbd_parent_info_cleanup(&pii);
rbd_spec_put(parent_spec);
return ret;
}
-static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev,
+ u64 *stripe_unit, u64 *stripe_count)
{
struct {
__le64 stripe_unit;
__le64 stripe_count;
} __attribute__ ((packed)) striping_info_buf = { 0 };
size_t size = sizeof (striping_info_buf);
- void *p;
int ret;
ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
@@ -6047,27 +6002,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
if (ret < size)
return -ERANGE;
- p = &striping_info_buf;
- rbd_dev->header.stripe_unit = ceph_decode_64(&p);
- rbd_dev->header.stripe_count = ceph_decode_64(&p);
+ *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit);
+ *stripe_count = le64_to_cpu(striping_info_buf.stripe_count);
+ dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit,
+ *stripe_count);
+
return 0;
}
-static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id)
{
- __le64 data_pool_id;
+ __le64 data_pool_buf;
int ret;
ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
&rbd_dev->header_oloc, "get_data_pool",
- NULL, 0, &data_pool_id, sizeof(data_pool_id));
+ NULL, 0, &data_pool_buf,
+ sizeof(data_pool_buf));
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret < 0)
return ret;
- if (ret < sizeof(data_pool_id))
+ if (ret < sizeof(data_pool_buf))
return -EBADMSG;
- rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
- WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
+ *data_pool_id = le64_to_cpu(data_pool_buf);
+ dout(" data_pool_id = %lld\n", *data_pool_id);
+ WARN_ON(*data_pool_id == CEPH_NOPOOL);
+
return 0;
}
@@ -6259,7 +6220,8 @@ out_err:
return ret;
}
-static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev,
+ struct ceph_snap_context **psnapc)
{
size_t size;
int ret;
@@ -6320,9 +6282,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
for (i = 0; i < snap_count; i++)
snapc->snaps[i] = ceph_decode_64(&p);
- ceph_put_snap_context(rbd_dev->header.snapc);
- rbd_dev->header.snapc = snapc;
-
+ *psnapc = snapc;
dout(" snap context seq = %llu, snap_count = %u\n",
(unsigned long long)seq, (unsigned int)snap_count);
out:
@@ -6371,38 +6331,42 @@ out:
return snap_name;
}
-static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header,
+ bool first_time)
{
- bool first_time = rbd_dev->header.object_prefix == NULL;
int ret;
- ret = rbd_dev_v2_image_size(rbd_dev);
+ ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
+ first_time ? &header->obj_order : NULL,
+ &header->image_size);
if (ret)
return ret;
if (first_time) {
- ret = rbd_dev_v2_header_onetime(rbd_dev);
+ ret = rbd_dev_v2_header_onetime(rbd_dev, header);
if (ret)
return ret;
}
- ret = rbd_dev_v2_snap_context(rbd_dev);
- if (ret && first_time) {
- kfree(rbd_dev->header.object_prefix);
- rbd_dev->header.object_prefix = NULL;
- }
+ ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
+ if (ret)
+ return ret;
- return ret;
+ return 0;
}
-static int rbd_dev_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_header_info(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header,
+ bool first_time)
{
rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ rbd_assert(!header->object_prefix && !header->snapc);
if (rbd_dev->image_format == 1)
- return rbd_dev_v1_header_info(rbd_dev);
+ return rbd_dev_v1_header_info(rbd_dev, header, first_time);
- return rbd_dev_v2_header_info(rbd_dev);
+ return rbd_dev_v2_header_info(rbd_dev, header, first_time);
}
/*
@@ -6752,60 +6716,49 @@ out:
*/
static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
{
- struct rbd_image_header *header;
-
rbd_dev_parent_put(rbd_dev);
rbd_object_map_free(rbd_dev);
rbd_dev_mapping_clear(rbd_dev);
/* Free dynamic fields from the header, then zero it out */
- header = &rbd_dev->header;
- ceph_put_snap_context(header->snapc);
- kfree(header->snap_sizes);
- kfree(header->snap_names);
- kfree(header->object_prefix);
- memset(header, 0, sizeof (*header));
+ rbd_image_header_cleanup(&rbd_dev->header);
}
-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header)
{
int ret;
- ret = rbd_dev_v2_object_prefix(rbd_dev);
+ ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
if (ret)
- goto out_err;
+ return ret;
/*
* Get the and check features for the image. Currently the
* features are assumed to never change.
*/
- ret = rbd_dev_v2_features(rbd_dev);
+ ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
+ &header->features);
if (ret)
- goto out_err;
+ return ret;
/* If the image supports fancy striping, get its parameters */
- if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
- ret = rbd_dev_v2_striping_info(rbd_dev);
- if (ret < 0)
- goto out_err;
+ if (header->features & RBD_FEATURE_STRIPINGV2) {
+ ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
+ &header->stripe_count);
+ if (ret)
+ return ret;
}
- if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
- ret = rbd_dev_v2_data_pool(rbd_dev);
+ if (header->features & RBD_FEATURE_DATA_POOL) {
+ ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
if (ret)
- goto out_err;
+ return ret;
}
- rbd_init_layout(rbd_dev);
return 0;
-
-out_err:
- rbd_dev->header.features = 0;
- kfree(rbd_dev->header.object_prefix);
- rbd_dev->header.object_prefix = NULL;
- return ret;
}
/*
@@ -6827,7 +6780,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
goto out_err;
}
- parent = __rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec);
+ parent = __rbd_dev_create(rbd_dev->parent_spec);
if (!parent) {
ret = -ENOMEM;
goto out_err;
@@ -6837,8 +6790,8 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
* Images related by parent/child relationships always share
* rbd_client and spec/parent_spec, so bump their refcounts.
*/
- __rbd_get_client(rbd_dev->rbd_client);
- rbd_spec_get(rbd_dev->parent_spec);
+ parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client);
+ parent->spec = rbd_spec_get(rbd_dev->parent_spec);
ret = rbd_dev_image_probe(parent, depth);
if (ret < 0)
@@ -6983,10 +6936,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
if (!depth)
down_write(&rbd_dev->header_rwsem);
- ret = rbd_dev_header_info(rbd_dev);
+ ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
if (ret)
goto err_out_probe;
+ rbd_init_layout(rbd_dev);
+
/*
* If this image is the one being mapped, we have pool name and
* id, image name and id, and snap name - need to fill snap id.
@@ -7020,7 +6975,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
}
if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
- ret = rbd_dev_v2_parent_info(rbd_dev);
+ ret = rbd_dev_setup_parent(rbd_dev);
if (ret)
goto err_out_probe;
}
@@ -7046,6 +7001,112 @@ err_out_format:
return ret;
}
+static void rbd_dev_update_header(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header)
+{
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
+
+ if (rbd_dev->header.image_size != header->image_size) {
+ rbd_dev->header.image_size = header->image_size;
+
+ if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
+ rbd_dev->mapping.size = header->image_size;
+ rbd_dev_update_size(rbd_dev);
+ }
+ }
+
+ if (rbd_dev->spec->snap_id != CEPH_NOSNAP) {
+ /* validate mapped snapshot's EXISTS flag */
+ rbd_exists_validate(rbd_dev);
+ }
+
+ ceph_put_snap_context(rbd_dev->header.snapc);
+ rbd_dev->header.snapc = header->snapc;
+ header->snapc = NULL;
+
+ if (rbd_dev->image_format == 1) {
+ kfree(rbd_dev->header.snap_names);
+ rbd_dev->header.snap_names = header->snap_names;
+ header->snap_names = NULL;
+
+ kfree(rbd_dev->header.snap_sizes);
+ rbd_dev->header.snap_sizes = header->snap_sizes;
+ header->snap_sizes = NULL;
+ }
+}
+
+static void rbd_dev_update_parent(struct rbd_device *rbd_dev,
+ struct parent_image_info *pii)
+{
+ if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
+ /*
+ * Either the parent never existed, or we have
+ * record of it but the image got flattened so it no
+ * longer has a parent. When the parent of a
+ * layered image disappears we immediately set the
+ * overlap to 0. The effect of this is that all new
+ * requests will be treated as if the image had no
+ * parent.
+ *
+ * If !pii.has_overlap, the parent image spec is not
+ * applicable. It's there to avoid duplication in each
+ * snapshot record.
+ */
+ if (rbd_dev->parent_overlap) {
+ rbd_dev->parent_overlap = 0;
+ rbd_dev_parent_put(rbd_dev);
+ pr_info("%s: clone has been flattened\n",
+ rbd_dev->disk->disk_name);
+ }
+ } else {
+ rbd_assert(rbd_dev->parent_spec);
+
+ /*
+ * Update the parent overlap. If it became zero, issue
+ * a warning as we will proceed as if there is no parent.
+ */
+ if (!pii->overlap && rbd_dev->parent_overlap)
+ rbd_warn(rbd_dev,
+ "clone has become standalone (overlap 0)");
+ rbd_dev->parent_overlap = pii->overlap;
+ }
+}
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+{
+ struct rbd_image_header header = { 0 };
+ struct parent_image_info pii = { 0 };
+ int ret;
+
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ ret = rbd_dev_header_info(rbd_dev, &header, false);
+ if (ret)
+ goto out;
+
+ /*
+ * If there is a parent, see if it has disappeared due to the
+ * mapped image getting flattened.
+ */
+ if (rbd_dev->parent) {
+ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+ if (ret)
+ goto out;
+ }
+
+ down_write(&rbd_dev->header_rwsem);
+ rbd_dev_update_header(rbd_dev, &header);
+ if (rbd_dev->parent)
+ rbd_dev_update_parent(rbd_dev, &pii);
+ up_write(&rbd_dev->header_rwsem);
+
+out:
+ rbd_parent_info_cleanup(&pii);
+ rbd_image_header_cleanup(&header);
+ return ret;
+}
+
static ssize_t do_rbd_add(struct bus_type *bus,
const char *buf,
size_t count)