diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch new file mode 100644 index 00000000..4abffa11 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.9.21/0019-kaiser-vmstat-show-NR_KAISERTABLE-as-nr_overhead.patch @@ -0,0 +1,122 @@ +From 5a28e367f6fd4c8e8c81ae99cf912d89930dd768 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins <hughd@google.com> +Date: Sat, 9 Sep 2017 21:27:32 -0700 +Subject: [PATCH 019/102] kaiser: vmstat show NR_KAISERTABLE as nr_overhead + +The kaiser update made an interesting choice, never to free any shadow +page tables. Contention on global spinlock was worrying, particularly +with it held across page table scans when freeing. Something had to be +done: I was going to add refcounting; but simply never to free them is +an appealing choice, minimizing contention without complicating the code +(the more a page table is found already, the less the spinlock is used). + +But leaking pages in this way is also a worry: can we get away with it? +At the very least, we need a count to show how bad it actually gets: +in principle, one might end up wasting about 1/256 of memory that way +(1/512 for when direct-mapped pages have to be user-mapped, plus 1/512 +for when they are user-mapped from the vmalloc area on another occasion +(but we don't have vmalloc'ed stacks, so only large ldts are vmalloc'ed). + +Add per-cpu stat NR_KAISERTABLE: including 256 at startup for the +shared pgd entries, and 1 for each intermediate page table added +thereafter for user-mapping - but leave out the 1 per mm, for its +shadow pgd, because that distracts from the monotonic increase. +Shown in /proc/vmstat as nr_overhead (0 if kaiser not enabled). + +In practice, it doesn't look so bad so far: more like 1/12000 after +nine hours of gtests below; and movable pageblock segregation should +tend to cluster the kaiser tables into a subset of the address space +(if not, they will be bad for compaction too). But production may +tell a different story: keep an eye on this number, and bring back +lighter freeing if it gets out of control (maybe a shrinker). + +["nr_overhead" should of course say "nr_kaisertable", if it needs +to stay; but for the moment we are being coy, preferring that when +Joe Blow notices a new line in his /proc/vmstat, he does not get +too curious about what this "kaiser" stuff might be.] + +Signed-off-by: Hugh Dickins <hughd@google.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/kaiser.c | 16 +++++++++++----- + include/linux/mmzone.h | 3 ++- + mm/vmstat.c | 1 + + 3 files changed, 14 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c +index 7a7e850..bd22ef5 100644 +--- a/arch/x86/mm/kaiser.c ++++ b/arch/x86/mm/kaiser.c +@@ -121,9 +121,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) + if (!new_pmd_page) + return NULL; + spin_lock(&shadow_table_allocation_lock); +- if (pud_none(*pud)) ++ if (pud_none(*pud)) { + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); +- else ++ __inc_zone_page_state(virt_to_page((void *) ++ new_pmd_page), NR_KAISERTABLE); ++ } else + free_page(new_pmd_page); + spin_unlock(&shadow_table_allocation_lock); + } +@@ -139,9 +141,11 @@ static pte_t *kaiser_pagetable_walk(unsigned long address, bool is_atomic) + if (!new_pte_page) + return NULL; + spin_lock(&shadow_table_allocation_lock); +- if (pmd_none(*pmd)) ++ if (pmd_none(*pmd)) { + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); +- else ++ __inc_zone_page_state(virt_to_page((void *) ++ new_pte_page), NR_KAISERTABLE); ++ } else + free_page(new_pte_page); + spin_unlock(&shadow_table_allocation_lock); + } +@@ -205,11 +209,13 @@ static void __init kaiser_init_all_pgds(void) + pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0)); + for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) { + pgd_t new_pgd; +- pud_t *pud = pud_alloc_one(&init_mm, PAGE_OFFSET + i * PGDIR_SIZE); ++ pud_t *pud = pud_alloc_one(&init_mm, ++ PAGE_OFFSET + i * PGDIR_SIZE); + if (!pud) { + WARN_ON(1); + break; + } ++ inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE); + new_pgd = __pgd(_KERNPG_TABLE |__pa(pud)); + /* + * Make sure not to stomp on some other pgd entry. +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 7e273e2..0547d4f 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -124,8 +124,9 @@ enum zone_stat_item { + NR_SLAB_UNRECLAIMABLE, + NR_PAGETABLE, /* used for pagetables */ + NR_KERNEL_STACK_KB, /* measured in KiB */ +- /* Second 128 byte cacheline */ ++ NR_KAISERTABLE, + NR_BOUNCE, ++ /* Second 128 byte cacheline */ + #if IS_ENABLED(CONFIG_ZSMALLOC) + NR_ZSPAGES, /* allocated in zsmalloc */ + #endif +diff --git a/mm/vmstat.c b/mm/vmstat.c +index 604f26a..6a088df 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -932,6 +932,7 @@ const char * const vmstat_text[] = { + "nr_slab_unreclaimable", + "nr_page_table_pages", + "nr_kernel_stack", ++ "nr_overhead", + "nr_bounce", + #if IS_ENABLED(CONFIG_ZSMALLOC) + "nr_zspages", +-- +2.7.4 + |