1 files changed, 122 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch
new file mode 100644
index 00000000..4abffa11
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch
@@ -0,0 +1,122 @@
+From 5a28e367f6fd4c8e8c81ae99cf912d89930dd768 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Sat, 9 Sep 2017 21:27:32 -0700
+Subject: [PATCH 019/102] kaiser: vmstat show NR_KAISERTABLE as nr_overhead
+
+The kaiser update made an interesting choice, never to free any shadow
+page tables.  Contention on global spinlock was worrying, particularly
+with it held across page table scans when freeing.  Something had to be
+done: I was going to add refcounting; but simply never to free them is
+an appealing choice, minimizing contention without complicating the code
+(the more a page table is found already, the less the spinlock is used).
+
+But leaking pages in this way is also a worry: can we get away with it?
+At the very least, we need a count to show how bad it actually gets:
+in principle, one might end up wasting about 1/256 of memory that way
+(1/512 for when direct-mapped pages have to be user-mapped, plus 1/512
+for when they are user-mapped from the vmalloc area on another occasion
+(but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed).
+
+Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the
+shared pgd entries, and 1 for each intermediate page table added
+thereafter for user-mapping - but leave out the 1 per mm, for its
+shadow pgd, because that distracts from the monotonic increase.
+Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled).
+
+In practice, it doesn't look so bad so far: more like 1/12000 after
+nine hours of gtests below; and movable pageblock segregation should
+tend to cluster the kaiser tables into a subset of the address space
+(if not, they will be bad for compaction too).  But production may
+tell a different story: keep an eye on this number, and bring back
+lighter freeing if it gets out of control (maybe a shrinker).
+
+["nr_overhead" should of course say "nr_kaisertable", if it needs
+to stay; but for the moment we are being coy, preferring that when
+Joe Blow notices a new line in his /proc/vmstat, he does not get
+too curious about what this "kaiser" stuff might be.]
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/kaiser.c   | 16 +++++++++++-----
+ include/linux/mmzone.h |  3 ++-
+ mm/vmstat.c            |  1 +
+ 3 files changed, 14 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
+index 7a7e850..bd22ef5 100644
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -121,9 +121,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
+ 		if (!new_pmd_page)
+ 			return NULL;
+ 		spin_lock(&shadow_table_allocation_lock);
+-		if (pud_none(*pud))
++		if (pud_none(*pud)) {
+ 			set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+-		else
++			__inc_zone_page_state(virt_to_page((void *)
++						new_pmd_page), NR_KAISERTABLE);
++		} else
+ 			free_page(new_pmd_page);
+ 		spin_unlock(&shadow_table_allocation_lock);
+ 	}
+@@ -139,9 +141,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic)
+ 		if (!new_pte_page)
+ 			return NULL;
+ 		spin_lock(&shadow_table_allocation_lock);
+-		if (pmd_none(*pmd))
++		if (pmd_none(*pmd)) {
+ 			set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+-		else
++			__inc_zone_page_state(virt_to_page((void *)
++						new_pte_page), NR_KAISERTABLE);
++		} else
+ 			free_page(new_pte_page);
+ 		spin_unlock(&shadow_table_allocation_lock);
+ 	}
+@@ -205,11 +209,13 @@ static void __init kaiser_init_all_pgds(void)
+ 	pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ 	for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+ 		pgd_t new_pgd;
+-		pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE);
++		pud_t *pud = pud_alloc_one(&init_mm,
++					   PAGE_OFFSET + i * PGDIR_SIZE);
+ 		if (!pud) {
+ 			WARN_ON(1);
+ 			break;
+ 		}
++		inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE);
+ 		new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
+ 		/*
+ 		 * Make sure not to stomp on some other pgd entry.
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 7e273e2..0547d4f 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -124,8 +124,9 @@ enum zone_stat_item {
+ 	NR_SLAB_UNRECLAIMABLE,
+ 	NR_PAGETABLE,		/* used for pagetables */
+ 	NR_KERNEL_STACK_KB,	/* measured in KiB */
+-	/* Second 128 byte cacheline */
++	NR_KAISERTABLE,
+ 	NR_BOUNCE,
++	/* Second 128 byte cacheline */
+ #if IS_ENABLED(CONFIG_ZSMALLOC)
+ 	NR_ZSPAGES,		/* allocated in zsmalloc */
+ #endif
+diff --git a/mm/vmstat.c b/mm/vmstat.c
+index 604f26a..6a088df 100644
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -932,6 +932,7 @@ const char * const vmstat_text[] = {
+ 	"nr_slab_unreclaimable",
+ 	"nr_page_table_pages",
+ 	"nr_kernel_stack",
++	"nr_overhead",
+ 	"nr_bounce",
+ #if IS_ENABLED(CONFIG_ZSMALLOC)
+ 	"nr_zspages",
+-- 
+2.7.4
+