From 1398d3c431f6fd53914f1b71dd30732092e6baa7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:37 -0500
Subject: [PATCH 111/191] mm: page_alloc: Use a local_lock instead of explicit
 local_irq_save().

The page-allocator disables interrupts for a few reasons:
- Decouple interrupt the irqsave operation from spin_lock() so it can be
  extended over the actual lock region and cover other areas. Areas like
  counters increments where the preemptible version can be avoided.

- Access to the per-CPU pcp from struct zone.

Replace the irqsave with a local-lock. The counters are expected to be
always modified with disabled preemption and no access from interrupt
context.

Contains fixes from:
	 Peter Zijlstra <a.p.zijlstra@chello.nl>
	 Thomas Gleixner <tglx@linutronix.de>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 mm/page_alloc.c | 49 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index db713dd3e08e..72993fb19c99 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -62,6 +62,7 @@
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/mm.h>
+#include <linux/local_lock.h>
 #include <linux/page_owner.h>
 #include <linux/kthread.h>
 #include <linux/memcontrol.h>
@@ -363,6 +364,13 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 int page_group_by_mobility_disabled __read_mostly;
 
+struct pa_lock {
+	local_lock_t l;
+};
+static DEFINE_PER_CPU(struct pa_lock, pa_lock) = {
+	.l	= INIT_LOCAL_LOCK(l),
+};
+
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 /*
  * During boot we initialize deferred pages on-demand, as needed, but once
@@ -1541,11 +1549,11 @@ static void __free_pages_ok(struct page *page, unsigned int order,
 		return;
 
 	migratetype = get_pfnblock_migratetype(page, pfn);
-	local_irq_save(flags);
+	local_lock_irqsave(&pa_lock.l, flags);
 	__count_vm_events(PGFREE, 1 << order);
 	free_one_page(page_zone(page), page, pfn, order, migratetype,
 		      fpi_flags);
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pa_lock.l, flags);
 }
 
 void __free_pages_core(struct page *page, unsigned int order)
@@ -2962,12 +2970,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 	unsigned long flags;
 	int to_drain, batch;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pa_lock.l, flags);
 	batch = READ_ONCE(pcp->batch);
 	to_drain = min(pcp->count, batch);
 	if (to_drain > 0)
 		free_pcppages_bulk(zone, to_drain, pcp);
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pa_lock.l, flags);
 }
 #endif
 
@@ -2984,13 +2992,13 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 	struct per_cpu_pageset *pset;
 	struct per_cpu_pages *pcp;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pa_lock.l, flags);
 	pset = per_cpu_ptr(zone->pageset, cpu);
 
 	pcp = &pset->pcp;
 	if (pcp->count)
 		free_pcppages_bulk(zone, pcp->count, pcp);
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pa_lock.l, flags);
 }
 
 /*
@@ -3253,9 +3261,9 @@ void free_unref_page(struct page *page)
 	if (!free_unref_page_prepare(page, pfn))
 		return;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pa_lock.l, flags);
 	free_unref_page_commit(page, pfn);
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pa_lock.l, flags);
 }
 
 /*
@@ -3275,7 +3283,7 @@ void free_unref_page_list(struct list_head *list)
 		set_page_private(page, pfn);
 	}
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pa_lock.l, flags);
 	list_for_each_entry_safe(page, next, list, lru) {
 		unsigned long pfn = page_private(page);
 
@@ -3288,12 +3296,12 @@ void free_unref_page_list(struct list_head *list)
 		 * a large list of pages to free.
 		 */
 		if (++batch_count == SWAP_CLUSTER_MAX) {
-			local_irq_restore(flags);
+			local_unlock_irqrestore(&pa_lock.l, flags);
 			batch_count = 0;
-			local_irq_save(flags);
+			local_lock_irqsave(&pa_lock.l, flags);
 		}
 	}
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pa_lock.l, flags);
 }
 
 /*
@@ -3449,7 +3457,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	struct page *page;
 	unsigned long flags;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pa_lock.l, flags);
 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
 	list = &pcp->lists[migratetype];
 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
@@ -3457,7 +3465,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
 		zone_statistics(preferred_zone, zone);
 	}
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pa_lock.l, flags);
 	return page;
 }
 
@@ -3491,7 +3499,9 @@ struct page *rmqueue(struct zone *preferred_zone,
 	 * allocate greater than order-1 page units with __GFP_NOFAIL.
 	 */
 	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
-	spin_lock_irqsave(&zone->lock, flags);
+
+	local_lock_irqsave(&pa_lock.l, flags);
+	spin_lock(&zone->lock);
 
 	do {
 		page = NULL;
@@ -3512,12 +3522,13 @@ struct page *rmqueue(struct zone *preferred_zone,
 	spin_unlock(&zone->lock);
 	if (!page)
 		goto failed;
+
 	__mod_zone_freepage_state(zone, -(1 << order),
 				  get_pcppage_migratetype(page));
 
 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
 	zone_statistics(preferred_zone, zone);
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pa_lock.l, flags);
 
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
@@ -3530,7 +3541,7 @@ struct page *rmqueue(struct zone *preferred_zone,
 	return page;
 
 failed:
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pa_lock.l, flags);
 	return NULL;
 }
 
@@ -8810,7 +8821,7 @@ void zone_pcp_reset(struct zone *zone)
 	struct per_cpu_pageset *pset;
 
 	/* avoid races with drain_pages()  */
-	local_irq_save(flags);
+	local_lock_irqsave(&pa_lock.l, flags);
 	if (zone->pageset != &boot_pageset) {
 		for_each_online_cpu(cpu) {
 			pset = per_cpu_ptr(zone->pageset, cpu);
@@ -8819,7 +8830,7 @@ void zone_pcp_reset(struct zone *zone)
 		free_percpu(zone->pageset);
 		zone->pageset = &boot_pageset;
 	}
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pa_lock.l, flags);
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-- 
2.19.1