diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 221 |
1 files changed, 119 insertions, 102 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d8df500e0eea..5e8b8e1b7d90 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -542,39 +542,10 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) return mz; } -/* - * Return page count for single (non recursive) @memcg. - * - * Implementation Note: reading percpu statistics for memcg. - * - * Both of vmstat[] and percpu_counter has threshold and do periodic - * synchronization to implement "quick" read. There are trade-off between - * reading cost and precision of value. Then, we may have a chance to implement - * a periodic synchronization of counter in memcg's counter. - * - * But this _read() function is used for user interface now. The user accounts - * memory usage by memory cgroup and he _always_ requires exact value because - * he accounts memory. Even if we provide quick-and-fuzzy read, we always - * have to visit all online cpus and make sum. So, for now, unnecessary - * synchronization is not implemented. (just implemented for cpu hotplug) - * - * If there are kernel internal actions which can make use of some not-exact - * value, and reading all cpu value can be performance bottleneck in some - * common workload, threshold and synchronization as vmstat[] should be - * implemented. - * - * The parameter idx can be of type enum memcg_event_item or vm_event_item. - */ - static unsigned long memcg_sum_events(struct mem_cgroup *memcg, int event) { - unsigned long val = 0; - int cpu; - - for_each_possible_cpu(cpu) - val += per_cpu(memcg->stat->events[event], cpu); - return val; + return atomic_long_read(&memcg->events[event]); } static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, @@ -586,27 +557,27 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, * counted as CACHE even if it's on ANON LRU. */ if (PageAnon(page)) - __this_cpu_add(memcg->stat->count[MEMCG_RSS], nr_pages); + __mod_memcg_state(memcg, MEMCG_RSS, nr_pages); else { - __this_cpu_add(memcg->stat->count[MEMCG_CACHE], nr_pages); + __mod_memcg_state(memcg, MEMCG_CACHE, nr_pages); if (PageSwapBacked(page)) - __this_cpu_add(memcg->stat->count[NR_SHMEM], nr_pages); + __mod_memcg_state(memcg, NR_SHMEM, nr_pages); } if (compound) { VM_BUG_ON_PAGE(!PageTransHuge(page), page); - __this_cpu_add(memcg->stat->count[MEMCG_RSS_HUGE], nr_pages); + __mod_memcg_state(memcg, MEMCG_RSS_HUGE, nr_pages); } /* pagein of a big page is an event. So, ignore page size */ if (nr_pages > 0) - __this_cpu_inc(memcg->stat->events[PGPGIN]); + __count_memcg_events(memcg, PGPGIN, 1); else { - __this_cpu_inc(memcg->stat->events[PGPGOUT]); + __count_memcg_events(memcg, PGPGOUT, 1); nr_pages = -nr_pages; /* for event */ } - __this_cpu_add(memcg->stat->nr_page_events, nr_pages); + __this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages); } unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, @@ -642,8 +613,8 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, { unsigned long val, next; - val = __this_cpu_read(memcg->stat->nr_page_events); - next = __this_cpu_read(memcg->stat->targets[target]); + val = __this_cpu_read(memcg->stat_cpu->nr_page_events); + next = __this_cpu_read(memcg->stat_cpu->targets[target]); /* from time_after() in jiffies.h */ if ((long)(next - val) < 0) { switch (target) { @@ -659,7 +630,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, default: break; } - __this_cpu_write(memcg->stat->targets[target], next); + __this_cpu_write(memcg->stat_cpu->targets[target], next); return true; } return false; @@ -1726,11 +1697,6 @@ void unlock_page_memcg(struct page *page) } EXPORT_SYMBOL(unlock_page_memcg); -/* - * size of first charge trial. "32" comes from vmscan.c's magic value. - * TODO: maybe necessary to use big numbers in big irons. - */ -#define CHARGE_BATCH 32U struct memcg_stock_pcp { struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; @@ -1758,7 +1724,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) unsigned long flags; bool ret = false; - if (nr_pages > CHARGE_BATCH) + if (nr_pages > MEMCG_CHARGE_BATCH) return ret; local_irq_save(flags); @@ -1827,7 +1793,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) } stock->nr_pages += nr_pages; - if (stock->nr_pages > CHARGE_BATCH) + if (stock->nr_pages > MEMCG_CHARGE_BATCH) drain_stock(stock); local_irq_restore(flags); @@ -1877,9 +1843,44 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) static int memcg_hotplug_cpu_dead(unsigned int cpu) { struct memcg_stock_pcp *stock; + struct mem_cgroup *memcg; stock = &per_cpu(memcg_stock, cpu); drain_stock(stock); + + for_each_mem_cgroup(memcg) { + int i; + + for (i = 0; i < MEMCG_NR_STAT; i++) { + int nid; + long x; + + x = this_cpu_xchg(memcg->stat_cpu->count[i], 0); + if (x) + atomic_long_add(x, &memcg->stat[i]); + + if (i >= NR_VM_NODE_STAT_ITEMS) + continue; + + for_each_node(nid) { + struct mem_cgroup_per_node *pn; + + pn = mem_cgroup_nodeinfo(memcg, nid); + x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0); + if (x) + atomic_long_add(x, &pn->lruvec_stat[i]); + } + } + + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { + long x; + + x = this_cpu_xchg(memcg->stat_cpu->events[i], 0); + if (x) + atomic_long_add(x, &memcg->events[i]); + } + } + return 0; } @@ -1890,7 +1891,7 @@ static void reclaim_high(struct mem_cgroup *memcg, do { if (page_counter_read(&memcg->memory) <= memcg->high) continue; - mem_cgroup_event(memcg, MEMCG_HIGH); + memcg_memory_event(memcg, MEMCG_HIGH); try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true); } while ((memcg = parent_mem_cgroup(memcg))); } @@ -1900,7 +1901,7 @@ static void high_work_func(struct work_struct *work) struct mem_cgroup *memcg; memcg = container_of(work, struct mem_cgroup, high_work); - reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL); + reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); } /* @@ -1924,7 +1925,7 @@ void mem_cgroup_handle_over_high(void) static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, unsigned int nr_pages) { - unsigned int batch = max(CHARGE_BATCH, nr_pages); + unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages); int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; struct mem_cgroup *mem_over_limit; struct page_counter *counter; @@ -1981,7 +1982,7 @@ retry: if (!gfpflags_allow_blocking(gfp_mask)) goto nomem; - mem_cgroup_event(mem_over_limit, MEMCG_MAX); + memcg_memory_event(mem_over_limit, MEMCG_MAX); nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, gfp_mask, may_swap); @@ -2024,7 +2025,7 @@ retry: if (fatal_signal_pending(current)) goto force; - mem_cgroup_event(mem_over_limit, MEMCG_OOM); + memcg_memory_event(mem_over_limit, MEMCG_OOM); mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages * PAGE_SIZE)); @@ -2444,18 +2445,11 @@ void mem_cgroup_split_huge_fixup(struct page *head) for (i = 1; i < HPAGE_PMD_NR; i++) head[i].mem_cgroup = head->mem_cgroup; - __this_cpu_sub(head->mem_cgroup->stat->count[MEMCG_RSS_HUGE], - HPAGE_PMD_NR); + __mod_memcg_state(head->mem_cgroup, MEMCG_RSS_HUGE, -HPAGE_PMD_NR); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_MEMCG_SWAP -static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, - int nr_entries) -{ - this_cpu_add(memcg->stat->count[MEMCG_SWAP], nr_entries); -} - /** * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record. * @entry: swap entry to be moved @@ -2479,8 +2473,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry, new_id = mem_cgroup_id(to); if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { - mem_cgroup_swap_statistics(from, -1); - mem_cgroup_swap_statistics(to, 1); + mod_memcg_state(from, MEMCG_SWAP, -1); + mod_memcg_state(to, MEMCG_SWAP, 1); return 0; } return -EINVAL; @@ -2796,10 +2790,10 @@ static void tree_events(struct mem_cgroup *memcg, unsigned long *events) struct mem_cgroup *iter; int i; - memset(events, 0, sizeof(*events) * MEMCG_NR_EVENTS); + memset(events, 0, sizeof(*events) * NR_VM_EVENT_ITEMS); for_each_mem_cgroup_tree(iter, memcg) { - for (i = 0; i < MEMCG_NR_EVENTS; i++) + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) events[i] += memcg_sum_events(iter, i); } } @@ -3654,7 +3648,8 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable); seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); - seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL)); + seq_printf(sf, "oom_kill %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); return 0; } @@ -3706,6 +3701,22 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) return &memcg->cgwb_domain; } +/* + * idx can be of type enum memcg_stat_item or node_stat_item. + * Keep in sync with memcg_exact_page(). + */ +static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx) +{ + long x = atomic_long_read(&memcg->stat[idx]); + int cpu; + + for_each_online_cpu(cpu) + x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx]; + if (x < 0) + x = 0; + return x; +} + /** * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg * @wb: bdi_writeback in question @@ -3731,10 +3742,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); struct mem_cgroup *parent; - *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY); + *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY); /* this should eventually include NR_UNSTABLE_NFS */ - *pwriteback = memcg_page_state(memcg, NR_WRITEBACK); + *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK); *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) | (1 << LRU_ACTIVE_FILE)); *pheadroom = PAGE_COUNTER_MAX; @@ -4210,8 +4221,8 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; - pn->lruvec_stat = alloc_percpu(struct lruvec_stat); - if (!pn->lruvec_stat) { + pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat); + if (!pn->lruvec_stat_cpu) { kfree(pn); return 1; } @@ -4232,7 +4243,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return; - free_percpu(pn->lruvec_stat); + free_percpu(pn->lruvec_stat_cpu); kfree(pn); } @@ -4242,7 +4253,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); - free_percpu(memcg->stat); + free_percpu(memcg->stat_cpu); kfree(memcg); } @@ -4271,8 +4282,8 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (memcg->id.id < 0) goto fail; - memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu); - if (!memcg->stat) + memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu); + if (!memcg->stat_cpu) goto fail; for_each_node(node) @@ -4514,7 +4525,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, struct page *page = NULL; swp_entry_t ent = pte_to_swp_entry(ptent); - if (!(mc.flags & MOVE_ANON) || non_swap_entry(ent)) + if (!(mc.flags & MOVE_ANON)) return NULL; /* @@ -4533,6 +4544,9 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, return page; } + if (non_swap_entry(ent)) + return NULL; + /* * Because lookup_swap_cache() updates some statistics counter, * we call find_get_page() with swapper_space directly. @@ -4629,8 +4643,8 @@ static int mem_cgroup_move_account(struct page *page, spin_lock_irqsave(&from->move_lock, flags); if (!anon && page_mapped(page)) { - __this_cpu_sub(from->stat->count[NR_FILE_MAPPED], nr_pages); - __this_cpu_add(to->stat->count[NR_FILE_MAPPED], nr_pages); + __mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages); + __mod_memcg_state(to, NR_FILE_MAPPED, nr_pages); } /* @@ -4642,16 +4656,14 @@ static int mem_cgroup_move_account(struct page *page, struct address_space *mapping = page_mapping(page); if (mapping_cap_account_dirty(mapping)) { - __this_cpu_sub(from->stat->count[NR_FILE_DIRTY], - nr_pages); - __this_cpu_add(to->stat->count[NR_FILE_DIRTY], - nr_pages); + __mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages); + __mod_memcg_state(to, NR_FILE_DIRTY, nr_pages); } } if (PageWriteback(page)) { - __this_cpu_sub(from->stat->count[NR_WRITEBACK], nr_pages); - __this_cpu_add(to->stat->count[NR_WRITEBACK], nr_pages); + __mod_memcg_state(from, NR_WRITEBACK, -nr_pages); + __mod_memcg_state(to, NR_WRITEBACK, nr_pages); } /* @@ -4883,7 +4895,6 @@ static void __mem_cgroup_clear_mc(void) if (!mem_cgroup_is_root(mc.to)) page_counter_uncharge(&mc.to->memory, mc.moved_swap); - mem_cgroup_id_get_many(mc.to, mc.moved_swap); css_put_many(&mc.to->css, mc.moved_swap); mc.moved_swap = 0; @@ -5074,7 +5085,8 @@ put: /* get_mctgt_type() gets the page */ ent = target.ent; if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) { mc.precharge--; - /* we fixup refcnts and charges later. */ + mem_cgroup_id_get_many(mc.to, 1); + /* we fixup other refcnts and charges later. */ mc.moved_swap++; } break; @@ -5304,7 +5316,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, continue; } - mem_cgroup_event(memcg, MEMCG_OOM); + memcg_memory_event(memcg, MEMCG_OOM); if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0)) break; } @@ -5317,11 +5329,16 @@ static int memory_events_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - seq_printf(m, "low %lu\n", memcg_sum_events(memcg, MEMCG_LOW)); - seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH)); - seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX)); - seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM)); - seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL)); + seq_printf(m, "low %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_LOW])); + seq_printf(m, "high %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_HIGH])); + seq_printf(m, "max %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_MAX])); + seq_printf(m, "oom %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_OOM])); + seq_printf(m, "oom_kill %lu\n", + atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); return 0; } @@ -5330,7 +5347,7 @@ static int memory_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); unsigned long stat[MEMCG_NR_STAT]; - unsigned long events[MEMCG_NR_EVENTS]; + unsigned long events[NR_VM_EVENT_ITEMS]; int i; /* @@ -5687,12 +5704,12 @@ static void uncharge_batch(const struct uncharge_gather *ug) } local_irq_save(flags); - __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS], ug->nr_anon); - __this_cpu_sub(ug->memcg->stat->count[MEMCG_CACHE], ug->nr_file); - __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS_HUGE], ug->nr_huge); - __this_cpu_sub(ug->memcg->stat->count[NR_SHMEM], ug->nr_shmem); - __this_cpu_add(ug->memcg->stat->events[PGPGOUT], ug->pgpgout); - __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages); + __mod_memcg_state(ug->memcg, MEMCG_RSS, -ug->nr_anon); + __mod_memcg_state(ug->memcg, MEMCG_CACHE, -ug->nr_file); + __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge); + __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem); + __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); + __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages); memcg_check_events(ug->memcg, ug->dummy_page); local_irq_restore(flags); @@ -5923,7 +5940,7 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) if (in_softirq()) gfp_mask = GFP_NOWAIT; - this_cpu_add(memcg->stat->count[MEMCG_SOCK], nr_pages); + mod_memcg_state(memcg, MEMCG_SOCK, nr_pages); if (try_charge(memcg, gfp_mask, nr_pages) == 0) return true; @@ -5944,7 +5961,7 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) return; } - this_cpu_sub(memcg->stat->count[MEMCG_SOCK], nr_pages); + mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages); refill_stock(memcg, nr_pages); } @@ -6068,7 +6085,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg), nr_entries); VM_BUG_ON_PAGE(oldid, page); - mem_cgroup_swap_statistics(swap_memcg, nr_entries); + mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); page->mem_cgroup = NULL; @@ -6134,7 +6151,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) mem_cgroup_id_get_many(memcg, nr_pages - 1); oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages); VM_BUG_ON_PAGE(oldid, page); - mem_cgroup_swap_statistics(memcg, nr_pages); + mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); return 0; } @@ -6162,7 +6179,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) else page_counter_uncharge(&memcg->memsw, nr_pages); } - mem_cgroup_swap_statistics(memcg, -nr_pages); + mod_memcg_state(memcg, MEMCG_SWAP, -nr_pages); mem_cgroup_id_put_many(memcg, nr_pages); } rcu_read_unlock(); |