aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch122
1 files changed, 122 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch
new file mode 100644
index 00000000..4abffa11
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch
@@ -0,0 +1,122 @@
+From 5a28e367f6fd4c8e8c81ae99cf912d89930dd768 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 9 Sep 2017 21:27:32 -0700
+Subject: [PATCH 019/102] kaiser: vmstat show NR_KAISERTABLE as nr_overhead
+
+The kaiser update made an interesting choice, never to free any shadow
+page tables. Contention on global spinlock was worrying, particularly
+with it held across page table scans when freeing. Something had to be
+done: I was going to add refcounting; but simply never to free them is
+an appealing choice, minimizing contention without complicating the code
+(the more a page table is found already, the less the spinlock is used).
+
+But leaking pages in this way is also a worry: can we get away with it?
+At the very least, we need a count to show how bad it actually gets:
+in principle, one might end up wasting about 1/256 of memory that way
+(1/512 for when direct-mapped pages have to be user-mapped, plus 1/512
+for when they are user-mapped from the vmalloc area on another occasion
+(but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed).
+
+Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the
+shared pgd entries, and 1 for each intermediate page table added
+thereafter for user-mapping - but leave out the 1 per mm, for its
+shadow pgd, because that distracts from the monotonic increase.
+Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled).
+
+In practice, it doesn't look so bad so far: more like 1/12000 after
+nine hours of gtests below; and movable pageblock segregation should
+tend to cluster the kaiser tables into a subset of the address space
+(if not, they will be bad for compaction too). But production may
+tell a different story: keep an eye on this number, and bring back
+lighter freeing if it gets out of control (maybe a shrinker).
+
+["nr_overhead" should of course say "nr_kaisertable", if it needs
+to stay; but for the moment we are being coy, preferring that when
+Joe Blow notices a new line in his /proc/vmstat, he does not get
+too curious about what this "kaiser" stuff might be.]
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c | 16 +++++++++++-----
+ include/linux/mmzone.h | 3 ++-
+ mm/vmstat.c | 1 +
+ 3 files changed, 14 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
+index 7a7e850..bd22ef5 100644
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -121,9 +121,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
+ if (!new_pmd_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+- if (pud_none(*pud))
++ if (pud_none(*pud)) {
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+- else
++ __inc_zone_page_state(virt_to_page((void *)
++ new_pmd_page), NR_KAISERTABLE);
++ } else
+ free_page(new_pmd_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+@@ -139,9 +141,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
+ if (!new_pte_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+- if (pmd_none(*pmd))
++ if (pmd_none(*pmd)) {
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+- else
++ __inc_zone_page_state(virt_to_page((void *)
++ new_pte_page), NR_KAISERTABLE);
++ } else
+ free_page(new_pte_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+@@ -205,11 +209,13 @@ static void __init kaiser_init_all_pgds(void)
+ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+ pgd_t new_pgd;
+- pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE);
++ pud_t *pud = pud_alloc_one(&init_mm,
++ PAGE_OFFSET + i * PGDIR_SIZE);
+ if (!pud) {
+ WARN_ON(1);
+ break;
+ }
++ inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE);
+ new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
+ /*
+ * Make sure not to stomp on some other pgd entry.
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 7e273e2..0547d4f 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -124,8 +124,9 @@ enum zone_stat_item {
+ NR_SLAB_UNRECLAIMABLE,
+ NR_PAGETABLE, /* used for pagetables */
+ NR_KERNEL_STACK_KB, /* measured in KiB */
+- /* Second 128 byte cacheline */
++ NR_KAISERTABLE,
+ NR_BOUNCE,
++ /* Second 128 byte cacheline */
+ #if IS_ENABLED(CONFIG_ZSMALLOC)
+ NR_ZSPAGES, /* allocated in zsmalloc */
+ #endif
+diff --git a/mm/vmstat.c b/mm/vmstat.c
+index 604f26a..6a088df 100644
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -932,6 +932,7 @@ const char * const vmstat_text[] = {
+ "nr_slab_unreclaimable",
+ "nr_page_table_pages",
+ "nr_kernel_stack",
++ "nr_overhead",
+ "nr_bounce",
+ #if IS_ENABLED(CONFIG_ZSMALLOC)
+ "nr_zspages",
+--
+2.7.4
+