aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c221
1 files changed, 119 insertions, 102 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d8df500e0eea..5e8b8e1b7d90 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -542,39 +542,10 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
return mz;
}
-/*
- * Return page count for single (non recursive) @memcg.
- *
- * Implementation Note: reading percpu statistics for memcg.
- *
- * Both of vmstat[] and percpu_counter has threshold and do periodic
- * synchronization to implement "quick" read. There are trade-off between
- * reading cost and precision of value. Then, we may have a chance to implement
- * a periodic synchronization of counter in memcg's counter.
- *
- * But this _read() function is used for user interface now. The user accounts
- * memory usage by memory cgroup and he _always_ requires exact value because
- * he accounts memory. Even if we provide quick-and-fuzzy read, we always
- * have to visit all online cpus and make sum. So, for now, unnecessary
- * synchronization is not implemented. (just implemented for cpu hotplug)
- *
- * If there are kernel internal actions which can make use of some not-exact
- * value, and reading all cpu value can be performance bottleneck in some
- * common workload, threshold and synchronization as vmstat[] should be
- * implemented.
- *
- * The parameter idx can be of type enum memcg_event_item or vm_event_item.
- */
-
static unsigned long memcg_sum_events(struct mem_cgroup *memcg,
int event)
{
- unsigned long val = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- val += per_cpu(memcg->stat->events[event], cpu);
- return val;
+ return atomic_long_read(&memcg->events[event]);
}
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
@@ -586,27 +557,27 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
* counted as CACHE even if it's on ANON LRU.
*/
if (PageAnon(page))
- __this_cpu_add(memcg->stat->count[MEMCG_RSS], nr_pages);
+ __mod_memcg_state(memcg, MEMCG_RSS, nr_pages);
else {
- __this_cpu_add(memcg->stat->count[MEMCG_CACHE], nr_pages);
+ __mod_memcg_state(memcg, MEMCG_CACHE, nr_pages);
if (PageSwapBacked(page))
- __this_cpu_add(memcg->stat->count[NR_SHMEM], nr_pages);
+ __mod_memcg_state(memcg, NR_SHMEM, nr_pages);
}
if (compound) {
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- __this_cpu_add(memcg->stat->count[MEMCG_RSS_HUGE], nr_pages);
+ __mod_memcg_state(memcg, MEMCG_RSS_HUGE, nr_pages);
}
/* pagein of a big page is an event. So, ignore page size */
if (nr_pages > 0)
- __this_cpu_inc(memcg->stat->events[PGPGIN]);
+ __count_memcg_events(memcg, PGPGIN, 1);
else {
- __this_cpu_inc(memcg->stat->events[PGPGOUT]);
+ __count_memcg_events(memcg, PGPGOUT, 1);
nr_pages = -nr_pages; /* for event */
}
- __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
+ __this_cpu_add(memcg->stat_cpu->nr_page_events, nr_pages);
}
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
@@ -642,8 +613,8 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
{
unsigned long val, next;
- val = __this_cpu_read(memcg->stat->nr_page_events);
- next = __this_cpu_read(memcg->stat->targets[target]);
+ val = __this_cpu_read(memcg->stat_cpu->nr_page_events);
+ next = __this_cpu_read(memcg->stat_cpu->targets[target]);
/* from time_after() in jiffies.h */
if ((long)(next - val) < 0) {
switch (target) {
@@ -659,7 +630,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
default:
break;
}
- __this_cpu_write(memcg->stat->targets[target], next);
+ __this_cpu_write(memcg->stat_cpu->targets[target], next);
return true;
}
return false;
@@ -1726,11 +1697,6 @@ void unlock_page_memcg(struct page *page)
}
EXPORT_SYMBOL(unlock_page_memcg);
-/*
- * size of first charge trial. "32" comes from vmscan.c's magic value.
- * TODO: maybe necessary to use big numbers in big irons.
- */
-#define CHARGE_BATCH 32U
struct memcg_stock_pcp {
struct mem_cgroup *cached; /* this never be root cgroup */
unsigned int nr_pages;
@@ -1758,7 +1724,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
unsigned long flags;
bool ret = false;
- if (nr_pages > CHARGE_BATCH)
+ if (nr_pages > MEMCG_CHARGE_BATCH)
return ret;
local_irq_save(flags);
@@ -1827,7 +1793,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
}
stock->nr_pages += nr_pages;
- if (stock->nr_pages > CHARGE_BATCH)
+ if (stock->nr_pages > MEMCG_CHARGE_BATCH)
drain_stock(stock);
local_irq_restore(flags);
@@ -1877,9 +1843,44 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
static int memcg_hotplug_cpu_dead(unsigned int cpu)
{
struct memcg_stock_pcp *stock;
+ struct mem_cgroup *memcg;
stock = &per_cpu(memcg_stock, cpu);
drain_stock(stock);
+
+ for_each_mem_cgroup(memcg) {
+ int i;
+
+ for (i = 0; i < MEMCG_NR_STAT; i++) {
+ int nid;
+ long x;
+
+ x = this_cpu_xchg(memcg->stat_cpu->count[i], 0);
+ if (x)
+ atomic_long_add(x, &memcg->stat[i]);
+
+ if (i >= NR_VM_NODE_STAT_ITEMS)
+ continue;
+
+ for_each_node(nid) {
+ struct mem_cgroup_per_node *pn;
+
+ pn = mem_cgroup_nodeinfo(memcg, nid);
+ x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0);
+ if (x)
+ atomic_long_add(x, &pn->lruvec_stat[i]);
+ }
+ }
+
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
+ long x;
+
+ x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
+ if (x)
+ atomic_long_add(x, &memcg->events[i]);
+ }
+ }
+
return 0;
}
@@ -1890,7 +1891,7 @@ static void reclaim_high(struct mem_cgroup *memcg,
do {
if (page_counter_read(&memcg->memory) <= memcg->high)
continue;
- mem_cgroup_event(memcg, MEMCG_HIGH);
+ memcg_memory_event(memcg, MEMCG_HIGH);
try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
} while ((memcg = parent_mem_cgroup(memcg)));
}
@@ -1900,7 +1901,7 @@ static void high_work_func(struct work_struct *work)
struct mem_cgroup *memcg;
memcg = container_of(work, struct mem_cgroup, high_work);
- reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
+ reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL);
}
/*
@@ -1924,7 +1925,7 @@ void mem_cgroup_handle_over_high(void)
static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned int nr_pages)
{
- unsigned int batch = max(CHARGE_BATCH, nr_pages);
+ unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *mem_over_limit;
struct page_counter *counter;
@@ -1981,7 +1982,7 @@ retry:
if (!gfpflags_allow_blocking(gfp_mask))
goto nomem;
- mem_cgroup_event(mem_over_limit, MEMCG_MAX);
+ memcg_memory_event(mem_over_limit, MEMCG_MAX);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
gfp_mask, may_swap);
@@ -2024,7 +2025,7 @@ retry:
if (fatal_signal_pending(current))
goto force;
- mem_cgroup_event(mem_over_limit, MEMCG_OOM);
+ memcg_memory_event(mem_over_limit, MEMCG_OOM);
mem_cgroup_oom(mem_over_limit, gfp_mask,
get_order(nr_pages * PAGE_SIZE));
@@ -2444,18 +2445,11 @@ void mem_cgroup_split_huge_fixup(struct page *head)
for (i = 1; i < HPAGE_PMD_NR; i++)
head[i].mem_cgroup = head->mem_cgroup;
- __this_cpu_sub(head->mem_cgroup->stat->count[MEMCG_RSS_HUGE],
- HPAGE_PMD_NR);
+ __mod_memcg_state(head->mem_cgroup, MEMCG_RSS_HUGE, -HPAGE_PMD_NR);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifdef CONFIG_MEMCG_SWAP
-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
- int nr_entries)
-{
- this_cpu_add(memcg->stat->count[MEMCG_SWAP], nr_entries);
-}
-
/**
* mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
* @entry: swap entry to be moved
@@ -2479,8 +2473,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
new_id = mem_cgroup_id(to);
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
- mem_cgroup_swap_statistics(from, -1);
- mem_cgroup_swap_statistics(to, 1);
+ mod_memcg_state(from, MEMCG_SWAP, -1);
+ mod_memcg_state(to, MEMCG_SWAP, 1);
return 0;
}
return -EINVAL;
@@ -2796,10 +2790,10 @@ static void tree_events(struct mem_cgroup *memcg, unsigned long *events)
struct mem_cgroup *iter;
int i;
- memset(events, 0, sizeof(*events) * MEMCG_NR_EVENTS);
+ memset(events, 0, sizeof(*events) * NR_VM_EVENT_ITEMS);
for_each_mem_cgroup_tree(iter, memcg) {
- for (i = 0; i < MEMCG_NR_EVENTS; i++)
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
events[i] += memcg_sum_events(iter, i);
}
}
@@ -3654,7 +3648,8 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
- seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
+ seq_printf(sf, "oom_kill %lu\n",
+ atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
return 0;
}
@@ -3706,6 +3701,22 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
return &memcg->cgwb_domain;
}
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page().
+ */
+static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
+{
+ long x = atomic_long_read(&memcg->stat[idx]);
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
+ if (x < 0)
+ x = 0;
+ return x;
+}
+
/**
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
* @wb: bdi_writeback in question
@@ -3731,10 +3742,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
struct mem_cgroup *parent;
- *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
+ *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
/* this should eventually include NR_UNSTABLE_NFS */
- *pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
+ *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
(1 << LRU_ACTIVE_FILE));
*pheadroom = PAGE_COUNTER_MAX;
@@ -4210,8 +4221,8 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
if (!pn)
return 1;
- pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
- if (!pn->lruvec_stat) {
+ pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat);
+ if (!pn->lruvec_stat_cpu) {
kfree(pn);
return 1;
}
@@ -4232,7 +4243,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
if (!pn)
return;
- free_percpu(pn->lruvec_stat);
+ free_percpu(pn->lruvec_stat_cpu);
kfree(pn);
}
@@ -4242,7 +4253,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
- free_percpu(memcg->stat);
+ free_percpu(memcg->stat_cpu);
kfree(memcg);
}
@@ -4271,8 +4282,8 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg->id.id < 0)
goto fail;
- memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
- if (!memcg->stat)
+ memcg->stat_cpu = alloc_percpu(struct mem_cgroup_stat_cpu);
+ if (!memcg->stat_cpu)
goto fail;
for_each_node(node)
@@ -4514,7 +4525,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
struct page *page = NULL;
swp_entry_t ent = pte_to_swp_entry(ptent);
- if (!(mc.flags & MOVE_ANON) || non_swap_entry(ent))
+ if (!(mc.flags & MOVE_ANON))
return NULL;
/*
@@ -4533,6 +4544,9 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
return page;
}
+ if (non_swap_entry(ent))
+ return NULL;
+
/*
* Because lookup_swap_cache() updates some statistics counter,
* we call find_get_page() with swapper_space directly.
@@ -4629,8 +4643,8 @@ static int mem_cgroup_move_account(struct page *page,
spin_lock_irqsave(&from->move_lock, flags);
if (!anon && page_mapped(page)) {
- __this_cpu_sub(from->stat->count[NR_FILE_MAPPED], nr_pages);
- __this_cpu_add(to->stat->count[NR_FILE_MAPPED], nr_pages);
+ __mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
+ __mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
}
/*
@@ -4642,16 +4656,14 @@ static int mem_cgroup_move_account(struct page *page,
struct address_space *mapping = page_mapping(page);
if (mapping_cap_account_dirty(mapping)) {
- __this_cpu_sub(from->stat->count[NR_FILE_DIRTY],
- nr_pages);
- __this_cpu_add(to->stat->count[NR_FILE_DIRTY],
- nr_pages);
+ __mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
+ __mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
}
}
if (PageWriteback(page)) {
- __this_cpu_sub(from->stat->count[NR_WRITEBACK], nr_pages);
- __this_cpu_add(to->stat->count[NR_WRITEBACK], nr_pages);
+ __mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
+ __mod_memcg_state(to, NR_WRITEBACK, nr_pages);
}
/*
@@ -4883,7 +4895,6 @@ static void __mem_cgroup_clear_mc(void)
if (!mem_cgroup_is_root(mc.to))
page_counter_uncharge(&mc.to->memory, mc.moved_swap);
- mem_cgroup_id_get_many(mc.to, mc.moved_swap);
css_put_many(&mc.to->css, mc.moved_swap);
mc.moved_swap = 0;
@@ -5074,7 +5085,8 @@ put: /* get_mctgt_type() gets the page */
ent = target.ent;
if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) {
mc.precharge--;
- /* we fixup refcnts and charges later. */
+ mem_cgroup_id_get_many(mc.to, 1);
+ /* we fixup other refcnts and charges later. */
mc.moved_swap++;
}
break;
@@ -5304,7 +5316,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
continue;
}
- mem_cgroup_event(memcg, MEMCG_OOM);
+ memcg_memory_event(memcg, MEMCG_OOM);
if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
break;
}
@@ -5317,11 +5329,16 @@ static int memory_events_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
- seq_printf(m, "low %lu\n", memcg_sum_events(memcg, MEMCG_LOW));
- seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
- seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
- seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
- seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
+ seq_printf(m, "low %lu\n",
+ atomic_long_read(&memcg->memory_events[MEMCG_LOW]));
+ seq_printf(m, "high %lu\n",
+ atomic_long_read(&memcg->memory_events[MEMCG_HIGH]));
+ seq_printf(m, "max %lu\n",
+ atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
+ seq_printf(m, "oom %lu\n",
+ atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
+ seq_printf(m, "oom_kill %lu\n",
+ atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
return 0;
}
@@ -5330,7 +5347,7 @@ static int memory_stat_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
unsigned long stat[MEMCG_NR_STAT];
- unsigned long events[MEMCG_NR_EVENTS];
+ unsigned long events[NR_VM_EVENT_ITEMS];
int i;
/*
@@ -5687,12 +5704,12 @@ static void uncharge_batch(const struct uncharge_gather *ug)
}
local_irq_save(flags);
- __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS], ug->nr_anon);
- __this_cpu_sub(ug->memcg->stat->count[MEMCG_CACHE], ug->nr_file);
- __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS_HUGE], ug->nr_huge);
- __this_cpu_sub(ug->memcg->stat->count[NR_SHMEM], ug->nr_shmem);
- __this_cpu_add(ug->memcg->stat->events[PGPGOUT], ug->pgpgout);
- __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages);
+ __mod_memcg_state(ug->memcg, MEMCG_RSS, -ug->nr_anon);
+ __mod_memcg_state(ug->memcg, MEMCG_CACHE, -ug->nr_file);
+ __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
+ __mod_memcg_state(ug->memcg, NR_SHMEM, -ug->nr_shmem);
+ __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
+ __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages);
memcg_check_events(ug->memcg, ug->dummy_page);
local_irq_restore(flags);
@@ -5923,7 +5940,7 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
if (in_softirq())
gfp_mask = GFP_NOWAIT;
- this_cpu_add(memcg->stat->count[MEMCG_SOCK], nr_pages);
+ mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
if (try_charge(memcg, gfp_mask, nr_pages) == 0)
return true;
@@ -5944,7 +5961,7 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
return;
}
- this_cpu_sub(memcg->stat->count[MEMCG_SOCK], nr_pages);
+ mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages);
refill_stock(memcg, nr_pages);
}
@@ -6068,7 +6085,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg),
nr_entries);
VM_BUG_ON_PAGE(oldid, page);
- mem_cgroup_swap_statistics(swap_memcg, nr_entries);
+ mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
page->mem_cgroup = NULL;
@@ -6134,7 +6151,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
mem_cgroup_id_get_many(memcg, nr_pages - 1);
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages);
VM_BUG_ON_PAGE(oldid, page);
- mem_cgroup_swap_statistics(memcg, nr_pages);
+ mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
return 0;
}
@@ -6162,7 +6179,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
else
page_counter_uncharge(&memcg->memsw, nr_pages);
}
- mem_cgroup_swap_statistics(memcg, -nr_pages);
+ mod_memcg_state(memcg, MEMCG_SWAP, -nr_pages);
mem_cgroup_id_put_many(memcg, nr_pages);
}
rcu_read_unlock();