aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/memcontrol.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r--include/linux/memcontrol.h236
1 files changed, 159 insertions, 77 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 69966c461d1c..365d5079d1cb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -48,13 +48,13 @@ enum memcg_stat_item {
MEMCG_NR_STAT,
};
-/* Cgroup-specific events, on top of universal VM events */
-enum memcg_event_item {
- MEMCG_LOW = NR_VM_EVENT_ITEMS,
+enum memcg_memory_event {
+ MEMCG_LOW,
MEMCG_HIGH,
MEMCG_MAX,
MEMCG_OOM,
- MEMCG_NR_EVENTS,
+ MEMCG_OOM_KILL,
+ MEMCG_NR_MEMORY_EVENTS,
};
struct mem_cgroup_reclaim_cookie {
@@ -88,7 +88,7 @@ enum mem_cgroup_events_target {
struct mem_cgroup_stat_cpu {
long count[MEMCG_NR_STAT];
- unsigned long events[MEMCG_NR_EVENTS];
+ unsigned long events[NR_VM_EVENT_ITEMS];
unsigned long nr_page_events;
unsigned long targets[MEM_CGROUP_NTARGETS];
};
@@ -108,7 +108,10 @@ struct lruvec_stat {
*/
struct mem_cgroup_per_node {
struct lruvec lruvec;
- struct lruvec_stat __percpu *lruvec_stat;
+
+ struct lruvec_stat __percpu *lruvec_stat_cpu;
+ atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS];
+
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
@@ -153,6 +156,15 @@ enum memcg_kmem_state {
KMEM_ONLINE,
};
+#if defined(CONFIG_SMP)
+struct memcg_padding {
+ char x[0];
+} ____cacheline_internodealigned_in_smp;
+#define MEMCG_PADDING(name) struct memcg_padding name;
+#else
+#define MEMCG_PADDING(name)
+#endif
+
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
@@ -199,7 +211,7 @@ struct mem_cgroup {
/* OOM-Killer disable */
int oom_kill_disable;
- /* handle for "memory.events" */
+ /* memory.events */
struct cgroup_file events_file;
/* protect arrays of thresholds */
@@ -219,18 +231,26 @@ struct mem_cgroup {
* mem_cgroup ? And what type of charges should we move ?
*/
unsigned long move_charge_at_immigrate;
- /*
- * set > 0 if pages under this cgroup are moving to other cgroup.
- */
- atomic_t moving_account;
/* taken only while moving_account > 0 */
spinlock_t move_lock;
- struct task_struct *move_lock_task;
unsigned long move_lock_flags;
+
+ MEMCG_PADDING(_pad1_);
+
/*
- * percpu counter.
+ * set > 0 if pages under this cgroup are moving to other cgroup.
*/
- struct mem_cgroup_stat_cpu __percpu *stat;
+ atomic_t moving_account;
+ struct task_struct *move_lock_task;
+
+ /* memory.stat */
+ struct mem_cgroup_stat_cpu __percpu *stat_cpu;
+
+ MEMCG_PADDING(_pad2_);
+
+ atomic_long_t stat[MEMCG_NR_STAT];
+ atomic_long_t events[NR_VM_EVENT_ITEMS];
+ atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
unsigned long socket_pressure;
@@ -265,6 +285,12 @@ struct mem_cgroup {
/* WARNING: nodeinfo must be the last member here */
};
+/*
+ * size of first charge trial. "32" comes from vmscan.c's magic value.
+ * TODO: maybe necessary to use big numbers in big irons.
+ */
+#define MEMCG_CHARGE_BATCH 32U
+
extern struct mem_cgroup *root_mem_cgroup;
static inline bool mem_cgroup_disabled(void)
@@ -272,13 +298,6 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
- enum memcg_event_item event)
-{
- this_cpu_inc(memcg->stat->events[event]);
- cgroup_file_notify(&memcg->events_file);
-}
-
bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
@@ -488,36 +507,47 @@ struct mem_cgroup *lock_page_memcg(struct page *page);
void __unlock_page_memcg(struct mem_cgroup *memcg);
void unlock_page_memcg(struct page *page);
-/* idx can be of type enum memcg_stat_item or node_stat_item */
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page_state().
+ */
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
int idx)
{
- long val = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- val += per_cpu(memcg->stat->count[idx], cpu);
-
- if (val < 0)
- val = 0;
-
- return val;
+ long x = atomic_long_read(&memcg->stat[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
int idx, int val)
{
- if (!mem_cgroup_disabled())
- __this_cpu_add(memcg->stat->count[idx], val);
+ long x;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
+ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
+ atomic_long_add(x, &memcg->stat[idx]);
+ x = 0;
+ }
+ __this_cpu_write(memcg->stat_cpu->count[idx], x);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void mod_memcg_state(struct mem_cgroup *memcg,
int idx, int val)
{
- if (!mem_cgroup_disabled())
- this_cpu_add(memcg->stat->count[idx], val);
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_memcg_state(memcg, idx, val);
+ local_irq_restore(flags);
}
/**
@@ -555,89 +585,116 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
struct mem_cgroup_per_node *pn;
- long val = 0;
- int cpu;
+ long x;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- for_each_possible_cpu(cpu)
- val += per_cpu(pn->lruvec_stat->count[idx], cpu);
-
- if (val < 0)
- val = 0;
-
- return val;
+ x = atomic_long_read(&pn->lruvec_stat[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
}
static inline void __mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
struct mem_cgroup_per_node *pn;
+ long x;
+ /* Update node */
__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+
if (mem_cgroup_disabled())
return;
+
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+
+ /* Update memcg */
__mod_memcg_state(pn->memcg, idx, val);
- __this_cpu_add(pn->lruvec_stat->count[idx], val);
+
+ /* Update lruvec */
+ x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
+ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
+ atomic_long_add(x, &pn->lruvec_stat[idx]);
+ x = 0;
+ }
+ __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
}
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
- struct mem_cgroup_per_node *pn;
+ unsigned long flags;
- mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
- if (mem_cgroup_disabled())
- return;
- pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- mod_memcg_state(pn->memcg, idx, val);
- this_cpu_add(pn->lruvec_stat->count[idx], val);
+ local_irq_save(flags);
+ __mod_lruvec_state(lruvec, idx, val);
+ local_irq_restore(flags);
}
static inline void __mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
- struct mem_cgroup_per_node *pn;
+ pg_data_t *pgdat = page_pgdat(page);
+ struct lruvec *lruvec;
- __mod_node_page_state(page_pgdat(page), idx, val);
- if (mem_cgroup_disabled() || !page->mem_cgroup)
+ /* Untracked pages have no memcg, no lruvec. Update only the node */
+ if (!page->mem_cgroup) {
+ __mod_node_page_state(pgdat, idx, val);
return;
- __mod_memcg_state(page->mem_cgroup, idx, val);
- pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
- __this_cpu_add(pn->lruvec_stat->count[idx], val);
+ }
+
+ lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
+ __mod_lruvec_state(lruvec, idx, val);
}
static inline void mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
- struct mem_cgroup_per_node *pn;
+ unsigned long flags;
- mod_node_page_state(page_pgdat(page), idx, val);
- if (mem_cgroup_disabled() || !page->mem_cgroup)
- return;
- mod_memcg_state(page->mem_cgroup, idx, val);
- pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
- this_cpu_add(pn->lruvec_stat->count[idx], val);
+ local_irq_save(flags);
+ __mod_lruvec_page_state(page, idx, val);
+ local_irq_restore(flags);
}
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
+static inline void __count_memcg_events(struct mem_cgroup *memcg,
+ enum vm_event_item idx,
+ unsigned long count)
+{
+ unsigned long x;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ x = count + __this_cpu_read(memcg->stat_cpu->events[idx]);
+ if (unlikely(x > MEMCG_CHARGE_BATCH)) {
+ atomic_long_add(x, &memcg->events[idx]);
+ x = 0;
+ }
+ __this_cpu_write(memcg->stat_cpu->events[idx], x);
+}
+
static inline void count_memcg_events(struct mem_cgroup *memcg,
enum vm_event_item idx,
unsigned long count)
{
- if (!mem_cgroup_disabled())
- this_cpu_add(memcg->stat->events[idx], count);
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __count_memcg_events(memcg, idx, count);
+ local_irq_restore(flags);
}
-/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void count_memcg_page_event(struct page *page,
- int idx)
+ enum vm_event_item idx)
{
if (page->mem_cgroup)
count_memcg_events(page->mem_cgroup, idx, 1);
@@ -653,13 +710,33 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
rcu_read_lock();
memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
- if (likely(memcg)) {
- this_cpu_inc(memcg->stat->events[idx]);
- if (idx == OOM_KILL)
- cgroup_file_notify(&memcg->events_file);
- }
+ if (likely(memcg))
+ count_memcg_events(memcg, idx, 1);
+ rcu_read_unlock();
+}
+
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+ enum memcg_memory_event event)
+{
+ atomic_long_inc(&memcg->memory_events[event]);
+ cgroup_file_notify(&memcg->events_file);
+}
+
+static inline void memcg_memory_event_mm(struct mm_struct *mm,
+ enum memcg_memory_event event)
+{
+ struct mem_cgroup *memcg;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ if (likely(memcg))
+ memcg_memory_event(memcg, event);
rcu_read_unlock();
}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void mem_cgroup_split_huge_fixup(struct page *head);
#endif
@@ -676,8 +753,13 @@ static inline bool mem_cgroup_disabled(void)
return true;
}
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
- enum memcg_event_item event)
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+ enum memcg_memory_event event)
+{
+}
+
+static inline void memcg_memory_event_mm(struct mm_struct *mm,
+ enum memcg_memory_event event)
{
}