diff options
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r-- | include/linux/memcontrol.h | 236 |
1 files changed, 159 insertions, 77 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 69966c461d1c..365d5079d1cb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -48,13 +48,13 @@ enum memcg_stat_item { MEMCG_NR_STAT, }; -/* Cgroup-specific events, on top of universal VM events */ -enum memcg_event_item { - MEMCG_LOW = NR_VM_EVENT_ITEMS, +enum memcg_memory_event { + MEMCG_LOW, MEMCG_HIGH, MEMCG_MAX, MEMCG_OOM, - MEMCG_NR_EVENTS, + MEMCG_OOM_KILL, + MEMCG_NR_MEMORY_EVENTS, }; struct mem_cgroup_reclaim_cookie { @@ -88,7 +88,7 @@ enum mem_cgroup_events_target { struct mem_cgroup_stat_cpu { long count[MEMCG_NR_STAT]; - unsigned long events[MEMCG_NR_EVENTS]; + unsigned long events[NR_VM_EVENT_ITEMS]; unsigned long nr_page_events; unsigned long targets[MEM_CGROUP_NTARGETS]; }; @@ -108,7 +108,10 @@ struct lruvec_stat { */ struct mem_cgroup_per_node { struct lruvec lruvec; - struct lruvec_stat __percpu *lruvec_stat; + + struct lruvec_stat __percpu *lruvec_stat_cpu; + atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; + unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; @@ -153,6 +156,15 @@ enum memcg_kmem_state { KMEM_ONLINE, }; +#if defined(CONFIG_SMP) +struct memcg_padding { + char x[0]; +} ____cacheline_internodealigned_in_smp; +#define MEMCG_PADDING(name) struct memcg_padding name; +#else +#define MEMCG_PADDING(name) +#endif + /* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide @@ -199,7 +211,7 @@ struct mem_cgroup { /* OOM-Killer disable */ int oom_kill_disable; - /* handle for "memory.events" */ + /* memory.events */ struct cgroup_file events_file; /* protect arrays of thresholds */ @@ -219,18 +231,26 @@ struct mem_cgroup { * mem_cgroup ? And what type of charges should we move ? */ unsigned long move_charge_at_immigrate; - /* - * set > 0 if pages under this cgroup are moving to other cgroup. - */ - atomic_t moving_account; /* taken only while moving_account > 0 */ spinlock_t move_lock; - struct task_struct *move_lock_task; unsigned long move_lock_flags; + + MEMCG_PADDING(_pad1_); + /* - * percpu counter. + * set > 0 if pages under this cgroup are moving to other cgroup. */ - struct mem_cgroup_stat_cpu __percpu *stat; + atomic_t moving_account; + struct task_struct *move_lock_task; + + /* memory.stat */ + struct mem_cgroup_stat_cpu __percpu *stat_cpu; + + MEMCG_PADDING(_pad2_); + + atomic_long_t stat[MEMCG_NR_STAT]; + atomic_long_t events[NR_VM_EVENT_ITEMS]; + atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; unsigned long socket_pressure; @@ -265,6 +285,12 @@ struct mem_cgroup { /* WARNING: nodeinfo must be the last member here */ }; +/* + * size of first charge trial. "32" comes from vmscan.c's magic value. + * TODO: maybe necessary to use big numbers in big irons. + */ +#define MEMCG_CHARGE_BATCH 32U + extern struct mem_cgroup *root_mem_cgroup; static inline bool mem_cgroup_disabled(void) @@ -272,13 +298,6 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } -static inline void mem_cgroup_event(struct mem_cgroup *memcg, - enum memcg_event_item event) -{ - this_cpu_inc(memcg->stat->events[event]); - cgroup_file_notify(&memcg->events_file); -} - bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, @@ -488,36 +507,47 @@ struct mem_cgroup *lock_page_memcg(struct page *page); void __unlock_page_memcg(struct mem_cgroup *memcg); void unlock_page_memcg(struct page *page); -/* idx can be of type enum memcg_stat_item or node_stat_item */ +/* + * idx can be of type enum memcg_stat_item or node_stat_item. + * Keep in sync with memcg_exact_page_state(). + */ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { - long val = 0; - int cpu; - - for_each_possible_cpu(cpu) - val += per_cpu(memcg->stat->count[idx], cpu); - - if (val < 0) - val = 0; - - return val; + long x = atomic_long_read(&memcg->stat[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) { - if (!mem_cgroup_disabled()) - __this_cpu_add(memcg->stat->count[idx], val); + long x; + + if (mem_cgroup_disabled()) + return; + + x = val + __this_cpu_read(memcg->stat_cpu->count[idx]); + if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { + atomic_long_add(x, &memcg->stat[idx]); + x = 0; + } + __this_cpu_write(memcg->stat_cpu->count[idx], x); } /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) { - if (!mem_cgroup_disabled()) - this_cpu_add(memcg->stat->count[idx], val); + unsigned long flags; + + local_irq_save(flags); + __mod_memcg_state(memcg, idx, val); + local_irq_restore(flags); } /** @@ -555,89 +585,116 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { struct mem_cgroup_per_node *pn; - long val = 0; - int cpu; + long x; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - for_each_possible_cpu(cpu) - val += per_cpu(pn->lruvec_stat->count[idx], cpu); - - if (val < 0) - val = 0; - - return val; + x = atomic_long_read(&pn->lruvec_stat[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { struct mem_cgroup_per_node *pn; + long x; + /* Update node */ __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); + if (mem_cgroup_disabled()) return; + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + + /* Update memcg */ __mod_memcg_state(pn->memcg, idx, val); - __this_cpu_add(pn->lruvec_stat->count[idx], val); + + /* Update lruvec */ + x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); + if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { + atomic_long_add(x, &pn->lruvec_stat[idx]); + x = 0; + } + __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); } static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { - struct mem_cgroup_per_node *pn; + unsigned long flags; - mod_node_page_state(lruvec_pgdat(lruvec), idx, val); - if (mem_cgroup_disabled()) - return; - pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - mod_memcg_state(pn->memcg, idx, val); - this_cpu_add(pn->lruvec_stat->count[idx], val); + local_irq_save(flags); + __mod_lruvec_state(lruvec, idx, val); + local_irq_restore(flags); } static inline void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { - struct mem_cgroup_per_node *pn; + pg_data_t *pgdat = page_pgdat(page); + struct lruvec *lruvec; - __mod_node_page_state(page_pgdat(page), idx, val); - if (mem_cgroup_disabled() || !page->mem_cgroup) + /* Untracked pages have no memcg, no lruvec. Update only the node */ + if (!page->mem_cgroup) { + __mod_node_page_state(pgdat, idx, val); return; - __mod_memcg_state(page->mem_cgroup, idx, val); - pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; - __this_cpu_add(pn->lruvec_stat->count[idx], val); + } + + lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup); + __mod_lruvec_state(lruvec, idx, val); } static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { - struct mem_cgroup_per_node *pn; + unsigned long flags; - mod_node_page_state(page_pgdat(page), idx, val); - if (mem_cgroup_disabled() || !page->mem_cgroup) - return; - mod_memcg_state(page->mem_cgroup, idx, val); - pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; - this_cpu_add(pn->lruvec_stat->count[idx], val); + local_irq_save(flags); + __mod_lruvec_page_state(page, idx, val); + local_irq_restore(flags); } unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); +static inline void __count_memcg_events(struct mem_cgroup *memcg, + enum vm_event_item idx, + unsigned long count) +{ + unsigned long x; + + if (mem_cgroup_disabled()) + return; + + x = count + __this_cpu_read(memcg->stat_cpu->events[idx]); + if (unlikely(x > MEMCG_CHARGE_BATCH)) { + atomic_long_add(x, &memcg->events[idx]); + x = 0; + } + __this_cpu_write(memcg->stat_cpu->events[idx], x); +} + static inline void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count) { - if (!mem_cgroup_disabled()) - this_cpu_add(memcg->stat->events[idx], count); + unsigned long flags; + + local_irq_save(flags); + __count_memcg_events(memcg, idx, count); + local_irq_restore(flags); } -/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void count_memcg_page_event(struct page *page, - int idx) + enum vm_event_item idx) { if (page->mem_cgroup) count_memcg_events(page->mem_cgroup, idx, 1); @@ -653,13 +710,33 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (likely(memcg)) { - this_cpu_inc(memcg->stat->events[idx]); - if (idx == OOM_KILL) - cgroup_file_notify(&memcg->events_file); - } + if (likely(memcg)) + count_memcg_events(memcg, idx, 1); + rcu_read_unlock(); +} + +static inline void memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event) +{ + atomic_long_inc(&memcg->memory_events[event]); + cgroup_file_notify(&memcg->events_file); +} + +static inline void memcg_memory_event_mm(struct mm_struct *mm, + enum memcg_memory_event event) +{ + struct mem_cgroup *memcg; + + if (mem_cgroup_disabled()) + return; + + rcu_read_lock(); + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (likely(memcg)) + memcg_memory_event(memcg, event); rcu_read_unlock(); } + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void mem_cgroup_split_huge_fixup(struct page *head); #endif @@ -676,8 +753,13 @@ static inline bool mem_cgroup_disabled(void) return true; } -static inline void mem_cgroup_event(struct mem_cgroup *memcg, - enum memcg_event_item event) +static inline void memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event) +{ +} + +static inline void memcg_memory_event_mm(struct mm_struct *mm, + enum memcg_memory_event event) { } |