1 files changed, 416 insertions, 0 deletions
diff --git a/features/rt/mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch b/features/rt/mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch
new file mode 100644
index 00000000..9f53a2dd
--- /dev/null
+++ b/features/rt/mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch
@@ -0,0 +1,416 @@
+From a554a721d714cba4bf3c8eb17e25913fa593a6bf Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 26 Feb 2021 15:14:15 +0100
+Subject: [PATCH 107/191] mm: slub: Move discard_slab() invocations out of
+ IRQ-off sections
+
+discard_slab() gives the memory back to the page-allocator. Some of its
+invocation occur from IRQ-disabled sections which were disabled by SLUB.
+An example is the deactivate_slab() invocation from within
+___slab_alloc() or put_cpu_partial().
+
+Instead of giving the memory back directly, put the pages on a list and
+process it once the caller is out of the known IRQ-off region.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slub.c | 114 +++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 78 insertions(+), 36 deletions(-)
+
+diff --git a/mm/slub.c b/mm/slub.c
+index 1382845c3802..af9c0fbe2cf5 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1889,12 +1889,29 @@ static void free_slab(struct kmem_cache *s, struct page *page)
+ 		__free_slab(s, page);
+ }
+ 
++static void discard_slab_delayed(struct kmem_cache *s, struct page *page,
++				 struct list_head *delayed_free)
++{
++	dec_slabs_node(s, page_to_nid(page), page->objects);
++	list_add(&page->lru, delayed_free);
++}
++
+ static void discard_slab(struct kmem_cache *s, struct page *page)
+ {
+ 	dec_slabs_node(s, page_to_nid(page), page->objects);
+ 	free_slab(s, page);
+ }
+ 
++static void discard_delayed(struct list_head *l)
++{
++	while (!list_empty(l)) {
++		struct page *page = list_first_entry(l, struct page, lru);
++
++		list_del(&page->lru);
++		__free_slab(page->slab_cache, page);
++	}
++}
++
+ /*
+  * Management of partially allocated slabs.
+  */
+@@ -1968,15 +1985,16 @@ static inline void *acquire_slab(struct kmem_cache *s,
+ 	WARN_ON(!freelist);
+ 	return freelist;
+ }
+-
+-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
++			    struct list_head *delayed_free);
+ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
+ 
+ /*
+  * Try to allocate a partial slab from a specific node.
+  */
+ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+-				struct kmem_cache_cpu *c, gfp_t flags)
++			      struct kmem_cache_cpu *c, gfp_t flags,
++			      struct list_head *delayed_free)
+ {
+ 	struct page *page, *page2;
+ 	void *object = NULL;
+@@ -2009,7 +2027,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+ 			stat(s, ALLOC_FROM_PARTIAL);
+ 			object = t;
+ 		} else {
+-			put_cpu_partial(s, page, 0);
++			put_cpu_partial(s, page, 0, delayed_free);
+ 			stat(s, CPU_PARTIAL_NODE);
+ 		}
+ 		if (!kmem_cache_has_cpu_partial(s)
+@@ -2025,7 +2043,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+  * Get a page from somewhere. Search in increasing NUMA distances.
+  */
+ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+-		struct kmem_cache_cpu *c)
++			     struct kmem_cache_cpu *c,
++			     struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_NUMA
+ 	struct zonelist *zonelist;
+@@ -2067,7 +2086,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+ 
+ 			if (n && cpuset_zone_allowed(zone, flags) &&
+ 					n->nr_partial > s->min_partial) {
+-				object = get_partial_node(s, n, c, flags);
++				object = get_partial_node(s, n, c, flags, delayed_free);
+ 				if (object) {
+ 					/*
+ 					 * Don't check read_mems_allowed_retry()
+@@ -2089,7 +2108,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+  * Get a partial page, lock it and return it.
+  */
+ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
+-		struct kmem_cache_cpu *c)
++			 struct kmem_cache_cpu *c,
++			 struct list_head *delayed_free)
+ {
+ 	void *object;
+ 	int searchnode = node;
+@@ -2097,11 +2117,12 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
+ 	if (node == NUMA_NO_NODE)
+ 		searchnode = numa_mem_id();
+ 
+-	object = get_partial_node(s, get_node(s, searchnode), c, flags);
++	object = get_partial_node(s, get_node(s, searchnode), c, flags,
++				  delayed_free);
+ 	if (object || node != NUMA_NO_NODE)
+ 		return object;
+ 
+-	return get_any_partial(s, flags, c);
++	return get_any_partial(s, flags, c, delayed_free);
+ }
+ 
+ #ifdef CONFIG_PREEMPTION
+@@ -2177,7 +2198,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
+  * Remove the cpu slab
+  */
+ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+-				void *freelist, struct kmem_cache_cpu *c)
++			    void *freelist, struct kmem_cache_cpu *c,
++			    struct list_head *delayed_free)
+ {
+ 	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
+ 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+@@ -2303,7 +2325,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+ 		stat(s, DEACTIVATE_FULL);
+ 	else if (m == M_FREE) {
+ 		stat(s, DEACTIVATE_EMPTY);
+-		discard_slab(s, page);
++		discard_slab_delayed(s, page, delayed_free);
+ 		stat(s, FREE_SLAB);
+ 	}
+ 
+@@ -2318,8 +2340,8 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+  * for the cpu using c (or some other guarantee must be there
+  * to guarantee no concurrent accesses).
+  */
+-static void unfreeze_partials(struct kmem_cache *s,
+-		struct kmem_cache_cpu *c)
++static void unfreeze_partials(struct kmem_cache *s, struct kmem_cache_cpu *c,
++			      struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_SLUB_CPU_PARTIAL
+ 	struct kmem_cache_node *n = NULL, *n2 = NULL;
+@@ -2373,7 +2395,7 @@ static void unfreeze_partials(struct kmem_cache *s,
+ 		discard_page = discard_page->next;
+ 
+ 		stat(s, DEACTIVATE_EMPTY);
+-		discard_slab(s, page);
++		discard_slab_delayed(s, page, delayed_free);
+ 		stat(s, FREE_SLAB);
+ 	}
+ #endif	/* CONFIG_SLUB_CPU_PARTIAL */
+@@ -2386,7 +2408,8 @@ static void unfreeze_partials(struct kmem_cache *s,
+  * If we did not find a slot then simply move all the partials to the
+  * per node partial list.
+  */
+-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
++			    struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_SLUB_CPU_PARTIAL
+ 	struct page *oldpage;
+@@ -2409,7 +2432,8 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
+ 				 * set to the per node partial list.
+ 				 */
+ 				local_irq_save(flags);
+-				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab),
++						  delayed_free);
+ 				local_irq_restore(flags);
+ 				oldpage = NULL;
+ 				pobjects = 0;
+@@ -2431,17 +2455,18 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
+ 		unsigned long flags;
+ 
+ 		local_irq_save(flags);
+-		unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++		unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), delayed_free);
+ 		local_irq_restore(flags);
+ 	}
+ 	preempt_enable();
+ #endif	/* CONFIG_SLUB_CPU_PARTIAL */
+ }
+ 
+-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
++static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c,
++			      struct list_head *delayed_free)
+ {
+ 	stat(s, CPUSLAB_FLUSH);
+-	deactivate_slab(s, c->page, c->freelist, c);
++	deactivate_slab(s, c->page, c->freelist, c, delayed_free);
+ 
+ 	c->tid = next_tid(c->tid);
+ }
+@@ -2451,21 +2476,24 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
+  *
+  * Called from IPI handler with interrupts disabled.
+  */
+-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
++static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu,
++				    struct list_head *delayed_free)
+ {
+ 	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+ 
+ 	if (c->page)
+-		flush_slab(s, c);
++		flush_slab(s, c, delayed_free);
+ 
+-	unfreeze_partials(s, c);
++	unfreeze_partials(s, c, delayed_free);
+ }
+ 
+ static void flush_cpu_slab(void *d)
+ {
+ 	struct kmem_cache *s = d;
++	LIST_HEAD(delayed_free);
+ 
+-	__flush_cpu_slab(s, smp_processor_id());
++	__flush_cpu_slab(s, smp_processor_id(), &delayed_free);
++	discard_delayed(&delayed_free);
+ }
+ 
+ static bool has_cpu_slab(int cpu, void *info)
+@@ -2489,13 +2517,15 @@ static int slub_cpu_dead(unsigned int cpu)
+ {
+ 	struct kmem_cache *s;
+ 	unsigned long flags;
++	LIST_HEAD(delayed_free);
+ 
+ 	mutex_lock(&slab_mutex);
+ 	list_for_each_entry(s, &slab_caches, list) {
+ 		local_irq_save(flags);
+-		__flush_cpu_slab(s, cpu);
++		__flush_cpu_slab(s, cpu, &delayed_free);
+ 		local_irq_restore(flags);
+ 	}
++	discard_delayed(&delayed_free);
+ 	mutex_unlock(&slab_mutex);
+ 	return 0;
+ }
+@@ -2579,7 +2609,8 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
+ }
+ 
+ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+-			int node, struct kmem_cache_cpu **pc)
++				     int node, struct kmem_cache_cpu **pc,
++				     struct list_head *delayed_free)
+ {
+ 	void *freelist;
+ 	struct kmem_cache_cpu *c = *pc;
+@@ -2587,7 +2618,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+ 
+ 	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
+ 
+-	freelist = get_partial(s, flags, node, c);
++	freelist = get_partial(s, flags, node, c, delayed_free);
+ 
+ 	if (freelist)
+ 		return freelist;
+@@ -2596,7 +2627,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+ 	if (page) {
+ 		c = raw_cpu_ptr(s->cpu_slab);
+ 		if (c->page)
+-			flush_slab(s, c);
++			flush_slab(s, c, delayed_free);
+ 
+ 		/*
+ 		 * No other reference to the page yet so we can
+@@ -2675,7 +2706,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
+  * already disabled (which is the case for bulk allocation).
+  */
+ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+-			  unsigned long addr, struct kmem_cache_cpu *c)
++			  unsigned long addr, struct kmem_cache_cpu *c,
++			  struct list_head *delayed_free)
+ {
+ 	void *freelist;
+ 	struct page *page;
+@@ -2705,7 +2737,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 			goto redo;
+ 		} else {
+ 			stat(s, ALLOC_NODE_MISMATCH);
+-			deactivate_slab(s, page, c->freelist, c);
++			deactivate_slab(s, page, c->freelist, c, delayed_free);
+ 			goto new_slab;
+ 		}
+ 	}
+@@ -2716,7 +2748,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 	 * information when the page leaves the per-cpu allocator
+ 	 */
+ 	if (unlikely(!pfmemalloc_match(page, gfpflags))) {
+-		deactivate_slab(s, page, c->freelist, c);
++		deactivate_slab(s, page, c->freelist, c, delayed_free);
+ 		goto new_slab;
+ 	}
+ 
+@@ -2755,7 +2787,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 		goto redo;
+ 	}
+ 
+-	freelist = new_slab_objects(s, gfpflags, node, &c);
++	freelist = new_slab_objects(s, gfpflags, node, &c, delayed_free);
+ 
+ 	if (unlikely(!freelist)) {
+ 		slab_out_of_memory(s, gfpflags, node);
+@@ -2771,7 +2803,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 			!alloc_debug_processing(s, page, freelist, addr))
+ 		goto new_slab;	/* Slab failed checks. Next slab needed */
+ 
+-	deactivate_slab(s, page, get_freepointer(s, freelist), c);
++	deactivate_slab(s, page, get_freepointer(s, freelist), c, delayed_free);
+ 	return freelist;
+ }
+ 
+@@ -2784,6 +2816,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ {
+ 	void *p;
+ 	unsigned long flags;
++	LIST_HEAD(delayed_free);
+ 
+ 	local_irq_save(flags);
+ #ifdef CONFIG_PREEMPTION
+@@ -2795,8 +2828,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 	c = this_cpu_ptr(s->cpu_slab);
+ #endif
+ 
+-	p = ___slab_alloc(s, gfpflags, node, addr, c);
++	p = ___slab_alloc(s, gfpflags, node, addr, c, &delayed_free);
+ 	local_irq_restore(flags);
++	discard_delayed(&delayed_free);
+ 	return p;
+ }
+ 
+@@ -3060,11 +3094,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+ 			 */
+ 			stat(s, FREE_FROZEN);
+ 		} else if (new.frozen) {
++			LIST_HEAD(delayed_free);
+ 			/*
+ 			 * If we just froze the page then put it onto the
+ 			 * per cpu partial list.
+ 			 */
+-			put_cpu_partial(s, page, 1);
++			put_cpu_partial(s, page, 1, &delayed_free);
++			discard_delayed(&delayed_free);
+ 			stat(s, CPU_PARTIAL_FREE);
+ 		}
+ 
+@@ -3315,6 +3351,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ 	struct kmem_cache_cpu *c;
+ 	int i;
+ 	struct obj_cgroup *objcg = NULL;
++	LIST_HEAD(delayed_free);
+ 
+ 	if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP))
+ 		WARN_ON_ONCE(!preemptible() &&
+@@ -3356,7 +3393,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ 			 * of re-populating per CPU c->freelist
+ 			 */
+ 			p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
+-					    _RET_IP_, c);
++					    _RET_IP_, c, &delayed_free);
+ 			if (unlikely(!p[i]))
+ 				goto error;
+ 
+@@ -3372,6 +3409,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ 	c->tid = next_tid(c->tid);
+ 	local_irq_enable();
+ 
++	discard_delayed(&delayed_free);
++
+ 	/* Clear memory outside IRQ disabled fastpath loop */
+ 	if (unlikely(slab_want_init_on_alloc(flags, s))) {
+ 		int j;
+@@ -3385,6 +3424,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ 	return i;
+ error:
+ 	local_irq_enable();
++	discard_delayed(&delayed_free);
+ 	slab_post_alloc_hook(s, objcg, flags, i, p);
+ 	__kmem_cache_free_bulk(s, i, p);
+ 	return 0;
+@@ -4437,6 +4477,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
+ 	int node;
+ 	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ 	struct kmem_cache_node *n;
++	LIST_HEAD(delayed_free);
+ 
+ 	memcpy(s, static_cache, kmem_cache->object_size);
+ 
+@@ -4445,7 +4486,8 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
+ 	 * up.  Even if it weren't true, IRQs are not up so we couldn't fire
+ 	 * IPIs around.
+ 	 */
+-	__flush_cpu_slab(s, smp_processor_id());
++	__flush_cpu_slab(s, smp_processor_id(), &delayed_free);
++	discard_delayed(&delayed_free);
+ 	for_each_kmem_cache_node(s, node, n) {
+ 		struct page *p;
+ 
+-- 
+2.19.1
+