aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r--block/blk-cgroup.c295
1 files changed, 132 insertions, 163 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c630e02836a8..2bed5725aa03 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -76,14 +76,42 @@ static void blkg_free(struct blkcg_gq *blkg)
if (blkg->pd[i])
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
- if (blkg->blkcg != &blkcg_root)
- blk_exit_rl(blkg->q, &blkg->rl);
-
blkg_rwstat_exit(&blkg->stat_ios);
blkg_rwstat_exit(&blkg->stat_bytes);
kfree(blkg);
}
+static void __blkg_release(struct rcu_head *rcu)
+{
+ struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+
+ percpu_ref_exit(&blkg->refcnt);
+
+ /* release the blkcg and parent blkg refs this blkg has been holding */
+ css_put(&blkg->blkcg->css);
+ if (blkg->parent)
+ blkg_put(blkg->parent);
+
+ wb_congested_put(blkg->wb_congested);
+
+ blkg_free(blkg);
+}
+
+/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid. For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+static void blkg_release(struct percpu_ref *ref)
+{
+ struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
+
+ call_rcu(&blkg->rcu_head, __blkg_release);
+}
+
/**
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with
@@ -110,14 +138,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
blkg->q = q;
INIT_LIST_HEAD(&blkg->q_node);
blkg->blkcg = blkcg;
- atomic_set(&blkg->refcnt, 1);
-
- /* root blkg uses @q->root_rl, init rl only for !root blkgs */
- if (blkcg != &blkcg_root) {
- if (blk_init_rl(&blkg->rl, q, gfp_mask))
- goto err_free;
- blkg->rl.blkg = blkg;
- }
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -157,7 +177,7 @@ struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
if (blkg && blkg->q == q) {
if (update_hint) {
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
rcu_assign_pointer(blkcg->blkg_hint, blkg);
}
return blkg;
@@ -180,7 +200,13 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
int i, ret;
WARN_ON_ONCE(!rcu_read_lock_held());
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
+
+ /* request_queue is dying, do not create/recreate a blkg */
+ if (blk_queue_dying(q)) {
+ ret = -ENODEV;
+ goto err_free_blkg;
+ }
/* blkg holds a reference to blkcg */
if (!css_tryget_online(&blkcg->css)) {
@@ -217,6 +243,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_get(blkg->parent);
}
+ ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
+ GFP_NOWAIT | __GFP_NOWARN);
+ if (ret)
+ goto err_cancel_ref;
+
/* invoke per-policy init */
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -249,6 +280,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_put(blkg);
return ERR_PTR(ret);
+err_cancel_ref:
+ percpu_ref_exit(&blkg->refcnt);
err_put_congested:
wb_congested_put(wb_congested);
err_put_css:
@@ -259,7 +292,7 @@ err_free_blkg:
}
/**
- * blkg_lookup_create - lookup blkg, try to create one if not there
+ * __blkg_lookup_create - lookup blkg, try to create one if not there
* @blkcg: blkcg of interest
* @q: request_queue of interest
*
@@ -268,24 +301,16 @@ err_free_blkg:
* that all non-root blkg's have access to the parent blkg. This function
* should be called under RCU read lock and @q->queue_lock.
*
- * Returns pointer to the looked up or created blkg on success, ERR_PTR()
- * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
- * dead and bypassing, returns ERR_PTR(-EBUSY).
+ * Returns the blkg or the closest blkg if blkg_create() fails as it walks
+ * down from root.
*/
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
- struct request_queue *q)
+struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
+ struct request_queue *q)
{
struct blkcg_gq *blkg;
WARN_ON_ONCE(!rcu_read_lock_held());
- lockdep_assert_held(q->queue_lock);
-
- /*
- * This could be the first entry point of blkcg implementation and
- * we shouldn't allow anything to go through for a bypassing queue.
- */
- if (unlikely(blk_queue_bypass(q)))
- return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
+ lockdep_assert_held(&q->queue_lock);
blkg = __blkg_lookup(blkcg, q, true);
if (blkg)
@@ -293,30 +318,64 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
/*
* Create blkgs walking down from blkcg_root to @blkcg, so that all
- * non-root blkgs have access to their parents.
+ * non-root blkgs have access to their parents. Returns the closest
+ * blkg to the intended blkg should blkg_create() fail.
*/
while (true) {
struct blkcg *pos = blkcg;
struct blkcg *parent = blkcg_parent(blkcg);
-
- while (parent && !__blkg_lookup(parent, q, false)) {
+ struct blkcg_gq *ret_blkg = q->root_blkg;
+
+ while (parent) {
+ blkg = __blkg_lookup(parent, q, false);
+ if (blkg) {
+ /* remember closest blkg */
+ ret_blkg = blkg;
+ break;
+ }
pos = parent;
parent = blkcg_parent(parent);
}
blkg = blkg_create(pos, q, NULL);
- if (pos == blkcg || IS_ERR(blkg))
+ if (IS_ERR(blkg))
+ return ret_blkg;
+ if (pos == blkcg)
return blkg;
}
}
+/**
+ * blkg_lookup_create - find or create a blkg
+ * @blkcg: target block cgroup
+ * @q: target request_queue
+ *
+ * This looks up or creates the blkg representing the unique pair
+ * of the blkcg and the request_queue.
+ */
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+ struct request_queue *q)
+{
+ struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
+
+ if (unlikely(!blkg)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->queue_lock, flags);
+ blkg = __blkg_lookup_create(blkcg, q);
+ spin_unlock_irqrestore(&q->queue_lock, flags);
+ }
+
+ return blkg;
+}
+
static void blkg_destroy(struct blkcg_gq *blkg)
{
struct blkcg *blkcg = blkg->blkcg;
struct blkcg_gq *parent = blkg->parent;
int i;
- lockdep_assert_held(blkg->q->queue_lock);
+ lockdep_assert_held(&blkg->q->queue_lock);
lockdep_assert_held(&blkcg->lock);
/* Something wrong if we are trying to remove same group twice */
@@ -353,7 +412,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
*/
- blkg_put(blkg);
+ percpu_ref_kill(&blkg->refcnt);
}
/**
@@ -366,8 +425,7 @@ static void blkg_destroy_all(struct request_queue *q)
{
struct blkcg_gq *blkg, *n;
- lockdep_assert_held(q->queue_lock);
-
+ spin_lock_irq(&q->queue_lock);
list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
@@ -377,65 +435,7 @@ static void blkg_destroy_all(struct request_queue *q)
}
q->root_blkg = NULL;
- q->root_rl.blkg = NULL;
-}
-
-/*
- * A group is RCU protected, but having an rcu lock does not mean that one
- * can access all the fields of blkg and assume these are valid. For
- * example, don't try to follow throtl_data and request queue links.
- *
- * Having a reference to blkg under an rcu allows accesses to only values
- * local to groups like group stats and group rate limits.
- */
-void __blkg_release_rcu(struct rcu_head *rcu_head)
-{
- struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
-
- /* release the blkcg and parent blkg refs this blkg has been holding */
- css_put(&blkg->blkcg->css);
- if (blkg->parent)
- blkg_put(blkg->parent);
-
- wb_congested_put(blkg->wb_congested);
-
- blkg_free(blkg);
-}
-EXPORT_SYMBOL_GPL(__blkg_release_rcu);
-
-/*
- * The next function used by blk_queue_for_each_rl(). It's a bit tricky
- * because the root blkg uses @q->root_rl instead of its own rl.
- */
-struct request_list *__blk_queue_next_rl(struct request_list *rl,
- struct request_queue *q)
-{
- struct list_head *ent;
- struct blkcg_gq *blkg;
-
- /*
- * Determine the current blkg list_head. The first entry is
- * root_rl which is off @q->blkg_list and mapped to the head.
- */
- if (rl == &q->root_rl) {
- ent = &q->blkg_list;
- /* There are no more block groups, hence no request lists */
- if (list_empty(ent))
- return NULL;
- } else {
- blkg = container_of(rl, struct blkcg_gq, rl);
- ent = &blkg->q_node;
- }
-
- /* walk to the next list_head, skip root blkcg */
- ent = ent->next;
- if (ent == &q->root_blkg->q_node)
- ent = ent->next;
- if (ent == &q->blkg_list)
- return NULL;
-
- blkg = container_of(ent, struct blkcg_gq, q_node);
- return &blkg->rl;
+ spin_unlock_irq(&q->queue_lock);
}
static int blkcg_reset_stats(struct cgroup_subsys_state *css,
@@ -477,7 +477,6 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
return dev_name(blkg->q->backing_dev_info->dev);
return NULL;
}
-EXPORT_SYMBOL_GPL(blkg_dev_name);
/**
* blkcg_print_blkgs - helper for printing per-blkg data
@@ -508,10 +507,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
rcu_read_lock();
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
- spin_lock_irq(blkg->q->queue_lock);
+ spin_lock_irq(&blkg->q->queue_lock);
if (blkcg_policy_enabled(blkg->q, pol))
total += prfill(sf, blkg->pd[pol->plid], data);
- spin_unlock_irq(blkg->q->queue_lock);
+ spin_unlock_irq(&blkg->q->queue_lock);
}
rcu_read_unlock();
@@ -709,7 +708,7 @@ u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
struct cgroup_subsys_state *pos_css;
u64 sum = 0;
- lockdep_assert_held(blkg->q->queue_lock);
+ lockdep_assert_held(&blkg->q->queue_lock);
rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
@@ -752,7 +751,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
struct blkg_rwstat sum = { };
int i;
- lockdep_assert_held(blkg->q->queue_lock);
+ lockdep_assert_held(&blkg->q->queue_lock);
rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
@@ -783,18 +782,10 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
struct request_queue *q)
{
WARN_ON_ONCE(!rcu_read_lock_held());
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
if (!blkcg_policy_enabled(q, pol))
return ERR_PTR(-EOPNOTSUPP);
-
- /*
- * This could be the first entry point of blkcg implementation and
- * we shouldn't allow anything to go through for a bypassing queue.
- */
- if (unlikely(blk_queue_bypass(q)))
- return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
-
return __blkg_lookup(blkcg, q, true /* update_hint */);
}
@@ -812,7 +803,7 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
*/
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx)
- __acquires(rcu) __acquires(disk->queue->queue_lock)
+ __acquires(rcu) __acquires(&disk->queue->queue_lock)
{
struct gendisk *disk;
struct request_queue *q;
@@ -840,7 +831,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
q = disk->queue;
rcu_read_lock();
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
blkg = blkg_lookup_check(blkcg, pol, q);
if (IS_ERR(blkg)) {
@@ -867,7 +858,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
}
/* Drop locks to do new blkg allocation with GFP_KERNEL. */
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
@@ -877,7 +868,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
}
rcu_read_lock();
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
blkg = blkg_lookup_check(pos, pol, q);
if (IS_ERR(blkg)) {
@@ -905,7 +896,7 @@ success:
return 0;
fail_unlock:
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
fail:
put_disk_and_module(disk);
@@ -921,7 +912,6 @@ fail:
}
return ret;
}
-EXPORT_SYMBOL_GPL(blkg_conf_prep);
/**
* blkg_conf_finish - finish up per-blkg config update
@@ -931,13 +921,12 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
* with blkg_conf_prep().
*/
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
- __releases(ctx->disk->queue->queue_lock) __releases(rcu)
+ __releases(&ctx->disk->queue->queue_lock) __releases(rcu)
{
- spin_unlock_irq(ctx->disk->queue->queue_lock);
+ spin_unlock_irq(&ctx->disk->queue->queue_lock);
rcu_read_unlock();
put_disk_and_module(ctx->disk);
}
-EXPORT_SYMBOL_GPL(blkg_conf_finish);
static int blkcg_print_stat(struct seq_file *sf, void *v)
{
@@ -967,7 +956,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
*/
off += scnprintf(buf+off, size-off, "%s ", dname);
- spin_lock_irq(blkg->q->queue_lock);
+ spin_lock_irq(&blkg->q->queue_lock);
rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
offsetof(struct blkcg_gq, stat_bytes));
@@ -981,7 +970,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]);
- spin_unlock_irq(blkg->q->queue_lock);
+ spin_unlock_irq(&blkg->q->queue_lock);
if (rbytes || wbytes || rios || wios) {
has_stats = true;
@@ -1102,9 +1091,9 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
struct blkcg_gq, blkcg_node);
struct request_queue *q = blkg->q;
- if (spin_trylock(q->queue_lock)) {
+ if (spin_trylock(&q->queue_lock)) {
blkg_destroy(blkg);
- spin_unlock(q->queue_lock);
+ spin_unlock(&q->queue_lock);
} else {
spin_unlock_irq(&blkcg->lock);
cpu_relax();
@@ -1225,36 +1214,31 @@ int blkcg_init_queue(struct request_queue *q)
/* Make sure the root blkg exists. */
rcu_read_lock();
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
blkg = blkg_create(&blkcg_root, q, new_blkg);
if (IS_ERR(blkg))
goto err_unlock;
q->root_blkg = blkg;
- q->root_rl.blkg = blkg;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
ret = blk_iolatency_init(q);
- if (ret) {
- spin_lock_irq(q->queue_lock);
- blkg_destroy_all(q);
- spin_unlock_irq(q->queue_lock);
- return ret;
- }
+ if (ret)
+ goto err_destroy_all;
ret = blk_throtl_init(q);
- if (ret) {
- spin_lock_irq(q->queue_lock);
- blkg_destroy_all(q);
- spin_unlock_irq(q->queue_lock);
- }
- return ret;
+ if (ret)
+ goto err_destroy_all;
+ return 0;
+err_destroy_all:
+ blkg_destroy_all(q);
+ return ret;
err_unlock:
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
@@ -1269,7 +1253,7 @@ err_unlock:
*/
void blkcg_drain_queue(struct request_queue *q)
{
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(&q->queue_lock);
/*
* @q could be exiting and already have destroyed all blkgs as
@@ -1289,10 +1273,7 @@ void blkcg_drain_queue(struct request_queue *q)
*/
void blkcg_exit_queue(struct request_queue *q)
{
- spin_lock_irq(q->queue_lock);
blkg_destroy_all(q);
- spin_unlock_irq(q->queue_lock);
-
blk_throtl_exit(q);
}
@@ -1396,10 +1377,8 @@ int blkcg_activate_policy(struct request_queue *q,
if (blkcg_policy_enabled(q, pol))
return 0;
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_freeze_queue(q);
- else
- blk_queue_bypass_start(q);
pd_prealloc:
if (!pd_prealloc) {
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
@@ -1409,7 +1388,7 @@ pd_prealloc:
}
}
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd;
@@ -1421,7 +1400,7 @@ pd_prealloc:
if (!pd)
swap(pd, pd_prealloc);
if (!pd) {
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
goto pd_prealloc;
}
@@ -1435,12 +1414,10 @@ pd_prealloc:
__set_bit(pol->plid, q->blkcg_pols);
ret = 0;
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
out_bypass_end:
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_unfreeze_queue(q);
- else
- blk_queue_bypass_end(q);
if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
return ret;
@@ -1463,12 +1440,10 @@ void blkcg_deactivate_policy(struct request_queue *q,
if (!blkcg_policy_enabled(q, pol))
return;
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_freeze_queue(q);
- else
- blk_queue_bypass_start(q);
- spin_lock_irq(q->queue_lock);
+ spin_lock_irq(&q->queue_lock);
__clear_bit(pol->plid, q->blkcg_pols);
@@ -1481,12 +1456,10 @@ void blkcg_deactivate_policy(struct request_queue *q,
}
}
- spin_unlock_irq(q->queue_lock);
+ spin_unlock_irq(&q->queue_lock);
- if (q->mq_ops)
+ if (queue_is_mq(q))
blk_mq_unfreeze_queue(q);
- else
- blk_queue_bypass_end(q);
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
@@ -1748,8 +1721,7 @@ void blkcg_maybe_throttle_current(void)
blkg = blkg_lookup(blkcg, q);
if (!blkg)
goto out;
- blkg = blkg_try_get(blkg);
- if (!blkg)
+ if (!blkg_tryget(blkg))
goto out;
rcu_read_unlock();
@@ -1761,7 +1733,6 @@ out:
rcu_read_unlock();
blk_put_queue(q);
}
-EXPORT_SYMBOL_GPL(blkcg_maybe_throttle_current);
/**
* blkcg_schedule_throttle - this task needs to check for throttling
@@ -1795,7 +1766,6 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
current->use_memdelay = use_memdelay;
set_notify_resume(current);
}
-EXPORT_SYMBOL_GPL(blkcg_schedule_throttle);
/**
* blkcg_add_delay - add delay to this blkg
@@ -1810,7 +1780,6 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
blkcg_scale_delay(blkg, now);
atomic64_add(delta, &blkg->delay_nsec);
}
-EXPORT_SYMBOL_GPL(blkcg_add_delay);
module_param(blkcg_debug_stats, bool, 0644);
MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");