aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c196
1 files changed, 141 insertions, 55 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 31ba80502df9..c8e172c805fa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -61,6 +61,7 @@
#include <linux/hugetlb.h>
#include <linux/sched/rt.h>
#include <linux/sched/mm.h>
+#include <linux/locallock.h>
#include <linux/page_owner.h>
#include <linux/kthread.h>
#include <linux/memcontrol.h>
@@ -291,6 +292,18 @@ EXPORT_SYMBOL(nr_node_ids);
EXPORT_SYMBOL(nr_online_nodes);
#endif
+static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+# define cpu_lock_irqsave(cpu, flags) \
+ local_lock_irqsave_on(pa_lock, flags, cpu)
+# define cpu_unlock_irqrestore(cpu, flags) \
+ local_unlock_irqrestore_on(pa_lock, flags, cpu)
+#else
+# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
+# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
+#endif
+
int page_group_by_mobility_disabled __read_mostly;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
@@ -1095,7 +1108,7 @@ static inline void prefetch_buddy(struct page *page)
}
/*
- * Frees a number of pages from the PCP lists
+ * Frees a number of pages which have been collected from the pcp lists.
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
@@ -1105,15 +1118,57 @@ static inline void prefetch_buddy(struct page *page)
* And clear the zone's pages_scanned counter, to hold off the "all pages are
* pinned" detection logic.
*/
-static void free_pcppages_bulk(struct zone *zone, int count,
- struct per_cpu_pages *pcp)
+static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
+ bool zone_retry)
+{
+ bool isolated_pageblocks;
+ struct page *page, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ isolated_pageblocks = has_isolate_pageblock(zone);
+
+ /*
+ * Use safe version since after __free_one_page(),
+ * page->lru.next will not point to original list.
+ */
+ list_for_each_entry_safe(page, tmp, head, lru) {
+ int mt = get_pcppage_migratetype(page);
+
+ if (page_zone(page) != zone) {
+ /*
+ * free_unref_page_list() sorts pages by zone. If we end
+ * up with pages from a different NUMA nodes belonging
+ * to the same ZONE index then we need to redo with the
+ * correct ZONE pointer. Skip the page for now, redo it
+ * on the next iteration.
+ */
+ WARN_ON_ONCE(zone_retry == false);
+ if (zone_retry)
+ continue;
+ }
+
+ /* MIGRATE_ISOLATE page should not go to pcplists */
+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+ /* Pageblock could have been isolated meanwhile */
+ if (unlikely(isolated_pageblocks))
+ mt = get_pageblock_migratetype(page);
+
+ list_del(&page->lru);
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt);
+ trace_mm_page_pcpu_drain(page, 0, mt);
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
+ struct list_head *dst)
+
{
int migratetype = 0;
int batch_free = 0;
int prefetch_nr = 0;
- bool isolated_pageblocks;
- struct page *page, *tmp;
- LIST_HEAD(head);
+ struct page *page;
while (count) {
struct list_head *list;
@@ -1145,7 +1200,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
if (bulkfree_pcp_prepare(page))
continue;
- list_add_tail(&page->lru, &head);
+ list_add_tail(&page->lru, dst);
/*
* We are going to put the page back to the global
@@ -1160,26 +1215,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
prefetch_buddy(page);
} while (--count && --batch_free && !list_empty(list));
}
-
- spin_lock(&zone->lock);
- isolated_pageblocks = has_isolate_pageblock(zone);
-
- /*
- * Use safe version since after __free_one_page(),
- * page->lru.next will not point to original list.
- */
- list_for_each_entry_safe(page, tmp, &head, lru) {
- int mt = get_pcppage_migratetype(page);
- /* MIGRATE_ISOLATE page should not go to pcplists */
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
- /* Pageblock could have been isolated meanwhile */
- if (unlikely(isolated_pageblocks))
- mt = get_pageblock_migratetype(page);
-
- __free_one_page(page, page_to_pfn(page), zone, 0, mt);
- trace_mm_page_pcpu_drain(page, 0, mt);
- }
- spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone,
@@ -1274,10 +1309,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
return;
migratetype = get_pfnblock_migratetype(page, pfn);
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
__count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, pfn, order, migratetype);
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
static void __init __free_pages_boot_core(struct page *page, unsigned int order)
@@ -2536,13 +2571,18 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
int to_drain, batch;
+ LIST_HEAD(dst);
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0)
- free_pcppages_bulk(zone, to_drain, pcp);
- local_irq_restore(flags);
+ isolate_pcp_pages(to_drain, pcp, &dst);
+
+ local_unlock_irqrestore(pa_lock, flags);
+
+ if (to_drain > 0)
+ free_pcppages_bulk(zone, &dst, false);
}
#endif
@@ -2558,14 +2598,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
unsigned long flags;
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
+ int count;
- local_irq_save(flags);
+ cpu_lock_irqsave(cpu, flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- if (pcp->count)
- free_pcppages_bulk(zone, pcp->count, pcp);
- local_irq_restore(flags);
+ count = pcp->count;
+ if (count)
+ isolate_pcp_pages(count, pcp, &dst);
+
+ cpu_unlock_irqrestore(cpu, flags);
+
+ if (count)
+ free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -2600,6 +2647,7 @@ void drain_local_pages(struct zone *zone)
drain_pages(cpu);
}
+#ifndef CONFIG_PREEMPT_RT_BASE
static void drain_local_pages_wq(struct work_struct *work)
{
/*
@@ -2613,6 +2661,7 @@ static void drain_local_pages_wq(struct work_struct *work)
drain_local_pages(NULL);
preempt_enable();
}
+#endif
/*
* Spill all the per-cpu pages from all CPUs back into the buddy allocator.
@@ -2679,7 +2728,14 @@ void drain_all_pages(struct zone *zone)
else
cpumask_clear_cpu(cpu, &cpus_with_pcps);
}
-
+#ifdef CONFIG_PREEMPT_RT_BASE
+ for_each_cpu(cpu, &cpus_with_pcps) {
+ if (zone)
+ drain_pages_zone(cpu, zone);
+ else
+ drain_pages(cpu);
+ }
+#else
for_each_cpu(cpu, &cpus_with_pcps) {
struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
INIT_WORK(work, drain_local_pages_wq);
@@ -2687,6 +2743,7 @@ void drain_all_pages(struct zone *zone)
}
for_each_cpu(cpu, &cpus_with_pcps)
flush_work(per_cpu_ptr(&pcpu_drain, cpu));
+#endif
mutex_unlock(&pcpu_drain_mutex);
}
@@ -2758,7 +2815,8 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
return true;
}
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
+ struct list_head *dst)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
@@ -2787,7 +2845,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = READ_ONCE(pcp->batch);
- free_pcppages_bulk(zone, batch, pcp);
+
+ isolate_pcp_pages(batch, pcp, dst);
}
}
@@ -2798,13 +2857,17 @@ void free_unref_page(struct page *page)
{
unsigned long flags;
unsigned long pfn = page_to_pfn(page);
+ struct zone *zone = page_zone(page);
+ LIST_HEAD(dst);
if (!free_unref_page_prepare(page, pfn))
return;
- local_irq_save(flags);
- free_unref_page_commit(page, pfn);
- local_irq_restore(flags);
+ local_lock_irqsave(pa_lock, flags);
+ free_unref_page_commit(page, pfn, &dst);
+ local_unlock_irqrestore(pa_lock, flags);
+ if (!list_empty(&dst))
+ free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -2815,6 +2878,11 @@ void free_unref_page_list(struct list_head *list)
struct page *page, *next;
unsigned long flags, pfn;
int batch_count = 0;
+ struct list_head dsts[__MAX_NR_ZONES];
+ int i;
+
+ for (i = 0; i < __MAX_NR_ZONES; i++)
+ INIT_LIST_HEAD(&dsts[i]);
/* Prepare pages for freeing */
list_for_each_entry_safe(page, next, list, lru) {
@@ -2824,25 +2892,42 @@ void free_unref_page_list(struct list_head *list)
set_page_private(page, pfn);
}
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
list_for_each_entry_safe(page, next, list, lru) {
unsigned long pfn = page_private(page);
+ enum zone_type type;
set_page_private(page, 0);
trace_mm_page_free_batched(page);
- free_unref_page_commit(page, pfn);
+ type = page_zonenum(page);
+ free_unref_page_commit(page, pfn, &dsts[type]);
/*
* Guard against excessive IRQ disabled times when we get
* a large list of pages to free.
*/
if (++batch_count == SWAP_CLUSTER_MAX) {
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
batch_count = 0;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
}
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
+
+ for (i = 0; i < __MAX_NR_ZONES; ) {
+ struct page *page;
+ struct zone *zone;
+
+ if (list_empty(&dsts[i])) {
+ i++;
+ continue;
+ }
+
+ page = list_first_entry(&dsts[i], struct page, lru);
+ zone = page_zone(page);
+
+ free_pcppages_bulk(zone, &dsts[i], true);
+ }
}
/*
@@ -2976,7 +3061,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct page *page;
unsigned long flags;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
page = __rmqueue_pcplist(zone, migratetype, pcp, list);
@@ -2984,7 +3069,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
return page;
}
@@ -3011,7 +3096,7 @@ struct page *rmqueue(struct zone *preferred_zone,
* allocate greater than order-1 page units with __GFP_NOFAIL.
*/
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
- spin_lock_irqsave(&zone->lock, flags);
+ local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
do {
page = NULL;
@@ -3031,14 +3116,14 @@ struct page *rmqueue(struct zone *preferred_zone,
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
out:
VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
return page;
failed:
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
return NULL;
}
@@ -7087,8 +7172,9 @@ void __init free_area_init(unsigned long *zones_size)
static int page_alloc_cpu_dead(unsigned int cpu)
{
-
+ local_lock_irq_on(swapvec_lock, cpu);
lru_add_drain_cpu(cpu);
+ local_unlock_irq_on(swapvec_lock, cpu);
drain_pages(cpu);
/*
@@ -7999,7 +8085,7 @@ void zone_pcp_reset(struct zone *zone)
struct per_cpu_pageset *pset;
/* avoid races with drain_pages() */
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (zone->pageset != &boot_pageset) {
for_each_online_cpu(cpu) {
pset = per_cpu_ptr(zone->pageset, cpu);
@@ -8008,7 +8094,7 @@ void zone_pcp_reset(struct zone *zone)
free_percpu(zone->pageset);
zone->pageset = &boot_pageset;
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
#ifdef CONFIG_MEMORY_HOTREMOVE