aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup')
-rw-r--r--kernel/cgroup/cgroup-v1.c3
-rw-r--r--kernel/cgroup/cgroup.c80
-rw-r--r--kernel/cgroup/cpuset.c3
-rw-r--r--kernel/cgroup/freezer.c9
-rw-r--r--kernel/cgroup/pids.c11
-rw-r--r--kernel/cgroup/rstat.c16
6 files changed, 75 insertions, 47 deletions
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 88006be40ea3..9dc41608b013 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -492,6 +492,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
*/
p++;
if (p >= end) {
+ (*pos)++;
return NULL;
} else {
*pos = *p;
@@ -802,7 +803,7 @@ void cgroup1_release_agent(struct work_struct *work)
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
- if (!pathbuf || !agentbuf)
+ if (!pathbuf || !agentbuf || !strlen(agentbuf))
goto out;
spin_lock_irq(&css_set_lock);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index d2cba714d3ee..fb24e7424411 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2119,11 +2119,12 @@ int cgroup_do_get_tree(struct fs_context *fc)
nsdentry = kernfs_node_dentry(cgrp->kn, sb);
dput(fc->root);
- fc->root = nsdentry;
if (IS_ERR(nsdentry)) {
- ret = PTR_ERR(nsdentry);
deactivate_locked_super(sb);
+ ret = PTR_ERR(nsdentry);
+ nsdentry = NULL;
}
+ fc->root = nsdentry;
}
if (!ctx->kfc.new_sb_created)
@@ -3067,8 +3068,6 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp)
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
- WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
-
if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
continue;
@@ -3078,6 +3077,8 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp)
return PTR_ERR(css);
}
+ WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
+
if (css_visible(css)) {
ret = css_populate_dir(css);
if (ret)
@@ -3113,11 +3114,11 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp)
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
- WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
-
if (!css)
continue;
+ WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
+
if (css->parent &&
!(cgroup_ss_mask(dsct) & (1 << ss->id))) {
kill_css(css);
@@ -3404,7 +3405,8 @@ static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
if (strcmp(strstrip(buf), "threaded"))
return -EINVAL;
- cgrp = cgroup_kn_lock_live(of->kn, false);
+ /* drain dying csses before we re-apply (threaded) subtree control */
+ cgrp = cgroup_kn_lock_live(of->kn, true);
if (!cgrp)
return -ENOENT;
@@ -4415,12 +4417,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
}
} while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
- if (!list_empty(&cset->tasks))
+ if (!list_empty(&cset->tasks)) {
it->task_pos = cset->tasks.next;
- else if (!list_empty(&cset->mg_tasks))
+ it->cur_tasks_head = &cset->tasks;
+ } else if (!list_empty(&cset->mg_tasks)) {
it->task_pos = cset->mg_tasks.next;
- else
+ it->cur_tasks_head = &cset->mg_tasks;
+ } else {
it->task_pos = cset->dying_tasks.next;
+ it->cur_tasks_head = &cset->dying_tasks;
+ }
it->tasks_head = &cset->tasks;
it->mg_tasks_head = &cset->mg_tasks;
@@ -4478,10 +4484,14 @@ repeat:
else
it->task_pos = it->task_pos->next;
- if (it->task_pos == it->tasks_head)
+ if (it->task_pos == it->tasks_head) {
it->task_pos = it->mg_tasks_head->next;
- if (it->task_pos == it->mg_tasks_head)
+ it->cur_tasks_head = it->mg_tasks_head;
+ }
+ if (it->task_pos == it->mg_tasks_head) {
it->task_pos = it->dying_tasks_head->next;
+ it->cur_tasks_head = it->dying_tasks_head;
+ }
if (it->task_pos == it->dying_tasks_head)
css_task_iter_advance_css_set(it);
} else {
@@ -4500,11 +4510,12 @@ repeat:
goto repeat;
/* and dying leaders w/o live member threads */
- if (!atomic_read(&task->signal->live))
+ if (it->cur_tasks_head == it->dying_tasks_head &&
+ !atomic_read(&task->signal->live))
goto repeat;
} else {
/* skip all dying ones */
- if (task->flags & PF_EXITING)
+ if (it->cur_tasks_head == it->dying_tasks_head)
goto repeat;
}
}
@@ -4613,6 +4624,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
struct kernfs_open_file *of = s->private;
struct css_task_iter *it = of->priv;
+ if (pos)
+ (*pos)++;
+
return css_task_iter_next(it);
}
@@ -4628,7 +4642,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
* from position 0, so we can simply keep iterating on !0 *pos.
*/
if (!it) {
- if (WARN_ON_ONCE((*pos)++))
+ if (WARN_ON_ONCE((*pos)))
return ERR_PTR(-EINVAL);
it = kzalloc(sizeof(*it), GFP_KERNEL);
@@ -4636,10 +4650,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
return ERR_PTR(-ENOMEM);
of->priv = it;
css_task_iter_start(&cgrp->self, iter_flags, it);
- } else if (!(*pos)++) {
+ } else if (!(*pos)) {
css_task_iter_end(it);
css_task_iter_start(&cgrp->self, iter_flags, it);
- }
+ } else
+ return it->cur_task;
return cgroup_procs_next(s, NULL, NULL);
}
@@ -6276,19 +6291,14 @@ void cgroup_sk_alloc_disable(void)
void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
{
- if (cgroup_sk_alloc_disabled)
+ if (cgroup_sk_alloc_disabled) {
+ skcd->no_refcnt = 1;
return;
+ }
- /* Socket clone path */
- if (skcd->val) {
- /*
- * We might be cloning a socket which is left in an empty
- * cgroup and the cgroup might have already been rmdir'd.
- * Don't use cgroup_get_live().
- */
- cgroup_get(sock_cgroup_ptr(skcd));
+ /* Don't associate the sock with unrelated interrupted task's cgroup. */
+ if (in_interrupt())
return;
- }
rcu_read_lock();
@@ -6306,8 +6316,24 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
rcu_read_unlock();
}
+void cgroup_sk_clone(struct sock_cgroup_data *skcd)
+{
+ /* Socket clone path */
+ if (skcd->val) {
+ /*
+ * We might be cloning a socket which is left in an empty
+ * cgroup and the cgroup might have already been rmdir'd.
+ * Don't use cgroup_get_live().
+ */
+ cgroup_get(sock_cgroup_ptr(skcd));
+ }
+}
+
void cgroup_sk_free(struct sock_cgroup_data *skcd)
{
+ if (skcd->no_refcnt)
+ return;
+
cgroup_put(sock_cgroup_ptr(skcd));
}
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 515525ff1cfd..e843e6351561 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -839,7 +839,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
continue;
- if (is_sched_load_balance(cp))
+ if (is_sched_load_balance(cp) &&
+ !cpumask_empty(cp->effective_cpus))
csa[csn++] = cp;
/* skip @cp's subtree if not a partition root */
diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c
index 8cf010680678..3984dd6b8ddb 100644
--- a/kernel/cgroup/freezer.c
+++ b/kernel/cgroup/freezer.c
@@ -231,6 +231,15 @@ void cgroup_freezer_migrate_task(struct task_struct *task,
return;
/*
+ * It's not necessary to do changes if both of the src and dst cgroups
+ * are not freezing and task is not frozen.
+ */
+ if (!test_bit(CGRP_FREEZE, &src->flags) &&
+ !test_bit(CGRP_FREEZE, &dst->flags) &&
+ !task->frozen)
+ return;
+
+ /*
* Adjust counters of freezing and frozen tasks.
* Note, that if the task is frozen, but the destination cgroup is not
* frozen, we bump both counters to keep them balanced.
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index 8e513a573fe9..138059eb730d 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -45,7 +45,7 @@ struct pids_cgroup {
* %PIDS_MAX = (%PID_MAX_LIMIT + 1).
*/
atomic64_t counter;
- int64_t limit;
+ atomic64_t limit;
/* Handle for "pids.events" */
struct cgroup_file events_file;
@@ -73,8 +73,8 @@ pids_css_alloc(struct cgroup_subsys_state *parent)
if (!pids)
return ERR_PTR(-ENOMEM);
- pids->limit = PIDS_MAX;
atomic64_set(&pids->counter, 0);
+ atomic64_set(&pids->limit, PIDS_MAX);
atomic64_set(&pids->events_limit, 0);
return &pids->css;
}
@@ -146,13 +146,14 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
for (p = pids; parent_pids(p); p = parent_pids(p)) {
int64_t new = atomic64_add_return(num, &p->counter);
+ int64_t limit = atomic64_read(&p->limit);
/*
* Since new is capped to the maximum number of pid_t, if
* p->limit is %PIDS_MAX then we know that this test will never
* fail.
*/
- if (new > p->limit)
+ if (new > limit)
goto revert;
}
@@ -277,7 +278,7 @@ set_limit:
* Limit updates don't need to be mutex'd, since it isn't
* critical that any racing fork()s follow the new limit.
*/
- pids->limit = limit;
+ atomic64_set(&pids->limit, limit);
return nbytes;
}
@@ -285,7 +286,7 @@ static int pids_max_show(struct seq_file *sf, void *v)
{
struct cgroup_subsys_state *css = seq_css(sf);
struct pids_cgroup *pids = css_pids(css);
- int64_t limit = pids->limit;
+ int64_t limit = atomic64_read(&pids->limit);
if (limit >= PIDS_MAX)
seq_printf(sf, "%s\n", PIDS_MAX_STR);
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index ca19b4c8acf5..4a942d4e9763 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -33,12 +33,9 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
return;
/*
- * Paired with the one in cgroup_rstat_cpu_pop_upated(). Either we
- * see NULL updated_next or they see our updated stat.
- */
- smp_mb();
-
- /*
+ * Speculative already-on-list test. This may race leading to
+ * temporary inaccuracies, which is fine.
+ *
* Because @parent's updated_children is terminated with @parent
* instead of NULL, we can tell whether @cgrp is on the list by
* testing the next pointer for NULL.
@@ -134,13 +131,6 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
*nextp = rstatc->updated_next;
rstatc->updated_next = NULL;
- /*
- * Paired with the one in cgroup_rstat_cpu_updated().
- * Either they see NULL updated_next or we see their
- * updated stat.
- */
- smp_mb();
-
return pos;
}