features/rt/mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

From 3bd87df897f0a51299da79bf24091cc937f1706e Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 26 Feb 2021 17:11:55 +0100
Subject: [PATCH 108/191] mm: slub: Move flush_cpu_slab() invocations
 __free_slab() invocations out of IRQ context

flush_all() flushes a specific SLAB cache on each CPU (where the cache
is present). The discard_delayed()/__free_slab() invocation happens
within IPI handler and is problematic for PREEMPT_RT.

The flush operation is not a frequent operation or a hot path. The
per-CPU flush operation can be moved to within a workqueue.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 mm/slub.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 52 insertions(+), 8 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index af9c0fbe2cf5..ec608c1d5fdb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2487,26 +2487,70 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu,
 	unfreeze_partials(s, c, delayed_free);
 }
 
-static void flush_cpu_slab(void *d)
+struct slub_flush_work {
+	struct work_struct work;
+	struct kmem_cache *s;
+	bool skip;
+};
+
+static void flush_cpu_slab(struct work_struct *w)
 {
-	struct kmem_cache *s = d;
+	struct slub_flush_work *sfw;
 	LIST_HEAD(delayed_free);
 
-	__flush_cpu_slab(s, smp_processor_id(), &delayed_free);
+	sfw = container_of(w, struct slub_flush_work, work);
+
+	local_irq_disable();
+	__flush_cpu_slab(sfw->s, smp_processor_id(), &delayed_free);
+	local_irq_enable();
+
 	discard_delayed(&delayed_free);
 }
 
-static bool has_cpu_slab(int cpu, void *info)
+static bool has_cpu_slab(int cpu, struct kmem_cache *s)
 {
-	struct kmem_cache *s = info;
 	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 
 	return c->page || slub_percpu_partial(c);
 }
 
+static DEFINE_MUTEX(flush_lock);
+static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
+
+static void flush_all_locked(struct kmem_cache *s)
+{
+	struct slub_flush_work *sfw;
+	unsigned int cpu;
+
+	mutex_lock(&flush_lock);
+
+	for_each_online_cpu(cpu) {
+		sfw = &per_cpu(slub_flush, cpu);
+		if (!has_cpu_slab(cpu, s)) {
+			sfw->skip = true;
+			continue;
+		}
+		INIT_WORK(&sfw->work, flush_cpu_slab);
+		sfw->skip = false;
+		sfw->s = s;
+		schedule_work_on(cpu, &sfw->work);
+	}
+
+	for_each_online_cpu(cpu) {
+		sfw = &per_cpu(slub_flush, cpu);
+		if (sfw->skip)
+			continue;
+		flush_work(&sfw->work);
+	}
+
+	mutex_unlock(&flush_lock);
+}
+
 static void flush_all(struct kmem_cache *s)
 {
-	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
+	cpus_read_lock();
+	flush_all_locked(s);
+	cpus_read_unlock();
 }
 
 /*
@@ -4009,7 +4053,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
 	int node;
 	struct kmem_cache_node *n;
 
-	flush_all(s);
+	flush_all_locked(s);
 	/* Attempt to free all objects */
 	for_each_kmem_cache_node(s, node, n) {
 		free_partial(s, n);
@@ -4293,7 +4337,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
 	unsigned long flags;
 	int ret = 0;
 
-	flush_all(s);
+	flush_all_locked(s);
 	for_each_kmem_cache_node(s, node, n) {
 		INIT_LIST_HEAD(&discard);
 		for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
-- 
2.19.1