aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S2
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c17
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugetlbpage.c1
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c1
-rw-r--r--arch/powerpc/mm/book3s64/hash_tlb.c1
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c25
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c8
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c300
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c1
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c219
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c82
-rw-r--r--arch/powerpc/mm/copro_fault.c7
-rw-r--r--arch/powerpc/mm/drmem.c87
-rw-r--r--arch/powerpc/mm/fault.c117
-rw-r--r--arch/powerpc/mm/hugetlbpage.c18
-rw-r--r--arch/powerpc/mm/init_32.c3
-rw-r--r--arch/powerpc/mm/init_64.c10
-rw-r--r--arch/powerpc/mm/kasan/8xx.c1
-rw-r--r--arch/powerpc/mm/kasan/book3s_32.c1
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c35
-rw-r--r--arch/powerpc/mm/mem.c8
-rw-r--r--arch/powerpc/mm/nohash/40x.c1
-rw-r--r--arch/powerpc/mm/nohash/8xx.c1
-rw-r--r--arch/powerpc/mm/nohash/fsl_booke.c1
-rw-r--r--arch/powerpc/mm/nohash/kaslr_booke.c1
-rw-r--r--arch/powerpc/mm/nohash/tlb.c1
-rw-r--r--arch/powerpc/mm/nohash/tlb_low_64e.S47
-rw-r--r--arch/powerpc/mm/numa.c507
-rw-r--r--arch/powerpc/mm/pgtable-frag.c3
-rw-r--r--arch/powerpc/mm/pgtable.c1
-rw-r--r--arch/powerpc/mm/pgtable_64.c1
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c4
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c56
33 files changed, 580 insertions, 988 deletions
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index 923ad8f374eb..1690d369688b 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -62,7 +62,7 @@ _GLOBAL(hash_page)
isync
#endif
/* Get PTE (linux-style) and check access */
- lis r0,KERNELBASE@h /* check if kernel address */
+ lis r0, TASK_SIZE@h /* check if kernel address */
cmplw 0,r4,r0
ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 03b6ba54460e..c0162911f6cb 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -187,6 +187,17 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
return __mmu_mapin_ram(border, top);
}
+static bool is_module_segment(unsigned long addr)
+{
+ if (!IS_ENABLED(CONFIG_MODULES))
+ return false;
+ if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
+ return false;
+ if (addr >= ALIGN(VMALLOC_END, SZ_256M))
+ return false;
+ return true;
+}
+
void mmu_mark_initmem_nx(void)
{
int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@@ -223,9 +234,9 @@ void mmu_mark_initmem_nx(void)
for (i = TASK_SIZE >> 28; i < 16; i++) {
/* Do not set NX on VM space for modules */
- if (IS_ENABLED(CONFIG_MODULES) &&
- (VMALLOC_START & 0xf0000000) == i << 28)
- break;
+ if (is_module_segment(i << 28))
+ continue;
+
mtsrin(mfsrin(i << 28) | 0x10000000, i << 28);
}
}
diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
index 25acb9c5ee1b..964467b3a776 100644
--- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
@@ -10,7 +10,6 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 2a99167afbaf..fd9c7f91b092 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -9,7 +9,6 @@
#include <linux/mm_types.h>
#include <linux/mm.h>
-#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/mmu.h>
#include <asm/tlb.h>
diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index 0fbf3dc9f2c2..eb0bccaf221e 100644
--- a/arch/powerpc/mm/book3s64/hash_tlb.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -21,7 +21,6 @@
#include <linux/mm.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/bug.h>
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 9b9f92ad0e7a..1478fceeb683 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -232,8 +232,6 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
rflags |= HPTE_R_I;
else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
rflags |= (HPTE_R_I | HPTE_R_G);
- else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
- rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
else
/*
* Add memory coherence if cache inhibited is not set
@@ -596,7 +594,7 @@ static void __init htab_scan_page_sizes(void)
}
#ifdef CONFIG_HUGETLB_PAGE
- if (!hugetlb_disabled) {
+ if (!hugetlb_disabled && !early_radix_enabled() ) {
/* Reserve 16G huge page memory sections for huge pages */
of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
}
@@ -663,11 +661,10 @@ static void __init htab_init_page_sizes(void)
* Pick a size for the linear mapping. Currently, we only
* support 16M, 1M and 4K which is the default
*/
- if (IS_ENABLED(STRICT_KERNEL_RWX) &&
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) &&
(unsigned long)_stext % 0x1000000) {
if (mmu_psize_defs[MMU_PAGE_16M].shift)
- pr_warn("Kernel not 16M aligned, "
- "disabling 16M linear map alignment");
+ pr_warn("Kernel not 16M aligned, disabling 16M linear map alignment\n");
aligned = false;
}
@@ -788,7 +785,7 @@ static unsigned long __init htab_get_table_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int resize_hpt_for_hotplug(unsigned long new_mem_size)
+static int resize_hpt_for_hotplug(unsigned long new_mem_size)
{
unsigned target_hpt_shift;
@@ -822,6 +819,8 @@ int hash__create_section_mapping(unsigned long start, unsigned long end,
return -1;
}
+ resize_hpt_for_hotplug(memblock_phys_mem_size());
+
rc = htab_bolt_mapping(start, end, __pa(start),
pgprot_val(prot), mmu_linear_psize,
mmu_kernel_ssize);
@@ -839,6 +838,10 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end)
int rc = htab_remove_mapping(start, end, mmu_linear_psize,
mmu_kernel_ssize);
WARN_ON(rc < 0);
+
+ if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+ pr_warn("Hash collision while resizing HPT\n");
+
return rc;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
@@ -1111,6 +1114,10 @@ void hash__early_init_mmu_secondary(void)
if (cpu_has_feature(CPU_FTR_ARCH_206)
&& cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ mtspr(SPRN_UAMOR, default_uamor);
+#endif
}
#endif /* CONFIG_SMP */
@@ -1731,10 +1738,6 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
return gslot;
}
-/*
- * WARNING: This is called from hash_low_64.S, if you change this prototype,
- * do not forget to update the assembly call site !
- */
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
unsigned long flags)
{
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index c58ad1049909..e18ae50a275c 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -15,6 +15,7 @@
#include <asm/powernv.h>
#include <asm/firmware.h>
#include <asm/ultravisor.h>
+#include <asm/kexec.h>
#include <mm/mmu_decl.h>
#include <trace/events/thp.h>
@@ -165,6 +166,8 @@ void mmu_cleanup_all(void)
radix__mmu_cleanup_all();
else if (mmu_hash_ops.hpte_clear_all)
mmu_hash_ops.hpte_clear_all();
+
+ reset_sprs();
}
#ifdef CONFIG_MEMORY_HOTPLUG
@@ -339,6 +342,9 @@ void pmd_fragment_free(unsigned long *pmd)
{
struct page *page = virt_to_page(pmd);
+ if (PageReserved(page))
+ return free_reserved_page(page);
+
BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
if (atomic_dec_and_test(&page->pt_frag_refcount)) {
pgtable_pmd_page_dtor(page);
@@ -356,7 +362,7 @@ static inline void pgtable_free(void *table, int index)
pmd_fragment_free(table);
break;
case PUD_INDEX:
- kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
+ __pud_free(table);
break;
#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
/* 16M hugepd directory at pud level */
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index d174106bab67..69a6b87f2bb4 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -10,58 +10,103 @@
#include <asm/mmu.h>
#include <asm/setup.h>
#include <linux/pkeys.h>
-#include <linux/of_device.h>
+#include <linux/of_fdt.h>
-DEFINE_STATIC_KEY_TRUE(pkey_disabled);
-int pkeys_total; /* Total pkeys as per device tree */
-u32 initial_allocation_mask; /* Bits set for the initially allocated keys */
-u32 reserved_allocation_mask; /* Bits set for reserved keys */
-static bool pkey_execute_disable_supported;
-static bool pkeys_devtree_defined; /* property exported by device tree */
-static u64 pkey_amr_mask; /* Bits in AMR not to be touched */
-static u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
-static u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */
+int num_pkey; /* Max number of pkeys supported */
+/*
+ * Keys marked in the reservation list cannot be allocated by userspace
+ */
+u32 reserved_allocation_mask __ro_after_init;
+
+/* Bits set for the initially allocated keys */
+static u32 initial_allocation_mask __ro_after_init;
+
+/*
+ * Even if we allocate keys with sys_pkey_alloc(), we need to make sure
+ * other thread still find the access denied using the same keys.
+ */
+static u64 default_amr = ~0x0UL;
+static u64 default_iamr = 0x5555555555555555UL;
+u64 default_uamor __ro_after_init;
+/*
+ * Key used to implement PROT_EXEC mmap. Denies READ/WRITE
+ * We pick key 2 because 0 is special key and 1 is reserved as per ISA.
+ */
static int execute_only_key = 2;
+static bool pkey_execute_disable_supported;
+
#define AMR_BITS_PER_PKEY 2
#define AMR_RD_BIT 0x1UL
#define AMR_WR_BIT 0x2UL
#define IAMR_EX_BIT 0x1UL
-#define PKEY_REG_BITS (sizeof(u64)*8)
+#define PKEY_REG_BITS (sizeof(u64) * 8)
#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
-static void scan_pkey_feature(void)
+static int __init dt_scan_storage_keys(unsigned long node,
+ const char *uname, int depth,
+ void *data)
{
- u32 vals[2];
- struct device_node *cpu;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *prop;
+ int *pkeys_total = (int *) data;
- cpu = of_find_node_by_type(NULL, "cpu");
- if (!cpu)
- return;
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
- if (of_property_read_u32_array(cpu,
- "ibm,processor-storage-keys", vals, 2))
- return;
+ prop = of_get_flat_dt_prop(node, "ibm,processor-storage-keys", NULL);
+ if (!prop)
+ return 0;
+ *pkeys_total = be32_to_cpu(prop[0]);
+ return 1;
+}
+
+static int scan_pkey_feature(void)
+{
+ int ret;
+ int pkeys_total = 0;
/*
- * Since any pkey can be used for data or execute, we will just treat
- * all keys as equal and track them as one entity.
+ * Pkey is not supported with Radix translation.
*/
- pkeys_total = vals[0];
- pkeys_devtree_defined = true;
-}
+ if (early_radix_enabled())
+ return 0;
-static inline bool pkey_mmu_enabled(void)
-{
- if (firmware_has_feature(FW_FEATURE_LPAR))
- return pkeys_total;
- else
- return cpu_has_feature(CPU_FTR_PKEY);
+ /*
+ * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1
+ */
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_206))
+ return 0;
+
+ ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total);
+ if (ret == 0) {
+ /*
+ * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device
+ * tree. We make this exception since some version of skiboot forgot to
+ * expose this property on power8/9.
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ unsigned long pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E ||
+ PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9)
+ pkeys_total = 32;
+ }
+ }
+
+ /*
+ * Adjust the upper limit, based on the number of bits supported by
+ * arch-neutral code.
+ */
+ pkeys_total = min_t(int, pkeys_total,
+ ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1));
+ return pkeys_total;
}
-static int pkey_initialize(void)
+void __init pkey_early_init_devtree(void)
{
- int os_reserved, i;
+ int pkeys_total, i;
/*
* We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
@@ -80,31 +125,14 @@ static int pkey_initialize(void)
!= (sizeof(u64) * BITS_PER_BYTE));
/* scan the device tree for pkey feature */
- scan_pkey_feature();
-
- /*
- * Let's assume 32 pkeys on P8 bare metal, if its not defined by device
- * tree. We make this exception since skiboot forgot to expose this
- * property on power8.
- */
- if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) &&
- cpu_has_feature(CPU_FTRS_POWER8))
- pkeys_total = 32;
-
- /*
- * Adjust the upper limit, based on the number of bits supported by
- * arch-neutral code.
- */
- pkeys_total = min_t(int, pkeys_total,
- ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1));
+ pkeys_total = scan_pkey_feature();
+ if (!pkeys_total)
+ goto out;
- if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
- static_branch_enable(&pkey_disabled);
- else
- static_branch_disable(&pkey_disabled);
+ /* Allow all keys to be modified by default */
+ default_uamor = ~0x0UL;
- if (static_branch_likely(&pkey_disabled))
- return 0;
+ cur_cpu_spec->mmu_features |= MMU_FTR_PKEY;
/*
* The device tree cannot be relied to indicate support for
@@ -118,53 +146,86 @@ static int pkey_initialize(void)
#ifdef CONFIG_PPC_4K_PAGES
/*
* The OS can manage only 8 pkeys due to its inability to represent them
- * in the Linux 4K PTE.
+ * in the Linux 4K PTE. Mark all other keys reserved.
*/
- os_reserved = pkeys_total - 8;
+ num_pkey = min(8, pkeys_total);
#else
- os_reserved = 0;
+ num_pkey = pkeys_total;
#endif
- /* Bits are in LE format. */
- reserved_allocation_mask = (0x1 << 1) | (0x1 << execute_only_key);
-
- /* register mask is in BE format */
- pkey_amr_mask = ~0x0ul;
- pkey_amr_mask &= ~(0x3ul << pkeyshift(0));
-
- pkey_iamr_mask = ~0x0ul;
- pkey_iamr_mask &= ~(0x3ul << pkeyshift(0));
- pkey_iamr_mask &= ~(0x3ul << pkeyshift(execute_only_key));
- pkey_uamor_mask = ~0x0ul;
- pkey_uamor_mask &= ~(0x3ul << pkeyshift(0));
- pkey_uamor_mask &= ~(0x3ul << pkeyshift(execute_only_key));
-
- /* mark the rest of the keys as reserved and hence unavailable */
- for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) {
- reserved_allocation_mask |= (0x1 << i);
- pkey_uamor_mask &= ~(0x3ul << pkeyshift(i));
- }
- initial_allocation_mask = reserved_allocation_mask | (0x1 << 0);
-
- if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) {
+ if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) {
/*
* Insufficient number of keys to support
* execute only key. Mark it unavailable.
- * Any AMR, UAMOR, IAMR bit set for
- * this key is irrelevant since this key
- * can never be allocated.
*/
execute_only_key = -1;
+ } else {
+ /*
+ * Mark the execute_only_pkey as not available for
+ * user allocation via pkey_alloc.
+ */
+ reserved_allocation_mask |= (0x1 << execute_only_key);
+
+ /*
+ * Deny READ/WRITE for execute_only_key.
+ * Allow execute in IAMR.
+ */
+ default_amr |= (0x3ul << pkeyshift(execute_only_key));
+ default_iamr &= ~(0x1ul << pkeyshift(execute_only_key));
+
+ /*
+ * Clear the uamor bits for this key.
+ */
+ default_uamor &= ~(0x3ul << pkeyshift(execute_only_key));
}
- return 0;
-}
+ /*
+ * Allow access for only key 0. And prevent any other modification.
+ */
+ default_amr &= ~(0x3ul << pkeyshift(0));
+ default_iamr &= ~(0x1ul << pkeyshift(0));
+ default_uamor &= ~(0x3ul << pkeyshift(0));
+ /*
+ * key 0 is special in that we want to consider it an allocated
+ * key which is preallocated. We don't allow changing AMR bits
+ * w.r.t key 0. But one can pkey_free(key0)
+ */
+ initial_allocation_mask |= (0x1 << 0);
-arch_initcall(pkey_initialize);
+ /*
+ * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
+ * programming note.
+ */
+ reserved_allocation_mask |= (0x1 << 1);
+ default_uamor &= ~(0x3ul << pkeyshift(1));
+
+ /*
+ * Prevent the usage of OS reserved keys. Update UAMOR
+ * for those keys. Also mark the rest of the bits in the
+ * 32 bit mask as reserved.
+ */
+ for (i = num_pkey; i < 32 ; i++) {
+ reserved_allocation_mask |= (0x1 << i);
+ default_uamor &= ~(0x3ul << pkeyshift(i));
+ }
+ /*
+ * Prevent the allocation of reserved keys too.
+ */
+ initial_allocation_mask |= reserved_allocation_mask;
+
+ pr_info("Enabling pkeys with max key count %d\n", num_pkey);
+out:
+ /*
+ * Setup uamor on boot cpu
+ */
+ mtspr(SPRN_UAMOR, default_uamor);
+
+ return;
+}
void pkey_mm_init(struct mm_struct *mm)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
mm_pkey_allocation_map(mm) = initial_allocation_mask;
mm->context.execute_only_pkey = execute_only_key;
@@ -196,30 +257,6 @@ static inline void write_iamr(u64 value)
mtspr(SPRN_IAMR, value);
}
-static inline u64 read_uamor(void)
-{
- return mfspr(SPRN_UAMOR);
-}
-
-static inline void write_uamor(u64 value)
-{
- mtspr(SPRN_UAMOR, value);
-}
-
-static bool is_pkey_enabled(int pkey)
-{
- u64 uamor = read_uamor();
- u64 pkey_bits = 0x3ul << pkeyshift(pkey);
- u64 uamor_pkey_bits = (uamor & pkey_bits);
-
- /*
- * Both the bits in UAMOR corresponding to the key should be set or
- * reset.
- */
- WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits));
- return !!(uamor_pkey_bits);
-}
-
static inline void init_amr(int pkey, u8 init_bits)
{
u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
@@ -245,8 +282,18 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
{
u64 new_amr_bits = 0x0ul;
u64 new_iamr_bits = 0x0ul;
+ u64 pkey_bits, uamor_pkey_bits;
+
+ /*
+ * Check whether the key is disabled by UAMOR.
+ */
+ pkey_bits = 0x3ul << pkeyshift(pkey);
+ uamor_pkey_bits = (default_uamor & pkey_bits);
- if (!is_pkey_enabled(pkey))
+ /*
+ * Both the bits in UAMOR corresponding to the key should be set
+ */
+ if (uamor_pkey_bits != pkey_bits)
return -EINVAL;
if (init_val & PKEY_DISABLE_EXECUTE) {
@@ -268,7 +315,7 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
void thread_pkey_regs_save(struct thread_struct *thread)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
/*
@@ -276,38 +323,33 @@ void thread_pkey_regs_save(struct thread_struct *thread)
*/
thread->amr = read_amr();
thread->iamr = read_iamr();
- thread->uamor = read_uamor();
}
void thread_pkey_regs_restore(struct thread_struct *new_thread,
struct thread_struct *old_thread)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
if (old_thread->amr != new_thread->amr)
write_amr(new_thread->amr);
if (old_thread->iamr != new_thread->iamr)
write_iamr(new_thread->iamr);
- if (old_thread->uamor != new_thread->uamor)
- write_uamor(new_thread->uamor);
}
void thread_pkey_regs_init(struct thread_struct *thread)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
- thread->amr = pkey_amr_mask;
- thread->iamr = pkey_iamr_mask;
- thread->uamor = pkey_uamor_mask;
+ thread->amr = default_amr;
+ thread->iamr = default_iamr;
- write_uamor(pkey_uamor_mask);
- write_amr(pkey_amr_mask);
- write_iamr(pkey_iamr_mask);
+ write_amr(default_amr);
+ write_iamr(default_iamr);
}
-int __execute_only_pkey(struct mm_struct *mm)
+int execute_only_pkey(struct mm_struct *mm)
{
return mm->context.execute_only_pkey;
}
@@ -366,7 +408,7 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute)
bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return true;
return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
@@ -383,7 +425,7 @@ bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
bool execute, bool foreign)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return true;
/*
* Do not enforce our key-permissions on a foreign vma.
@@ -396,7 +438,7 @@ bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
/* Duplicate the oldmm pkey state in mm: */
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index c812b401b66c..cb91071eef52 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -2,7 +2,6 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/security.h>
-#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/machdep.h>
#include <asm/mman.h>
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index bb00e0cba119..28c784976bed 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -15,7 +15,7 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/string_helpers.h>
-#include <linux/stop_machine.h>
+#include <linux/memory.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
@@ -34,6 +34,7 @@
unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
+unsigned int radix_mem_block_size __ro_after_init;
static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
@@ -56,6 +57,13 @@ static __ref void *early_alloc_pgtable(unsigned long size, int nid,
return ptr;
}
+/*
+ * When allocating pud or pmd pointers, we allocate a complete page
+ * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This
+ * is to ensure that the page obtained from the memblock allocator
+ * can be completely used as page table page and can be freed
+ * correctly when the page table entries are removed.
+ */
static int early_map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags,
unsigned int map_page_size,
@@ -72,8 +80,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
pgdp = pgd_offset_k(ea);
p4dp = p4d_offset(pgdp, ea);
if (p4d_none(*p4dp)) {
- pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
- region_start, region_end);
+ pudp = early_alloc_pgtable(PAGE_SIZE, nid,
+ region_start, region_end);
p4d_populate(&init_mm, p4dp, pudp);
}
pudp = pud_offset(p4dp, ea);
@@ -82,8 +90,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
goto set_the_pte;
}
if (pud_none(*pudp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
- region_start, region_end);
+ pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start,
+ region_end);
pud_populate(&init_mm, pudp, pmdp);
}
pmdp = pmd_offset(pudp, ea);
@@ -259,6 +267,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
static int __meminit create_physical_mapping(unsigned long start,
unsigned long end,
+ unsigned long max_mapping_size,
int nid, pgprot_t _prot)
{
unsigned long vaddr, addr, mapping_size = 0;
@@ -272,6 +281,8 @@ static int __meminit create_physical_mapping(unsigned long start,
int rc;
gap = next_boundary(addr, end) - addr;
+ if (gap > max_mapping_size)
+ gap = max_mapping_size;
previous_size = mapping_size;
prev_exec = exec;
@@ -322,8 +333,9 @@ static void __init radix_init_pgtable(void)
/* We don't support slb for radix */
mmu_slb_size = 0;
+
/*
- * Create the linear mapping, using standard page size for now
+ * Create the linear mapping
*/
for_each_memblock(memory, reg) {
/*
@@ -339,6 +351,7 @@ static void __init radix_init_pgtable(void)
WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size,
+ radix_mem_block_size,
-1, PAGE_KERNEL));
}
@@ -479,6 +492,57 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
return 1;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+ depth, void *data)
+{
+ unsigned long *mem_block_size = (unsigned long *)data;
+ const __be64 *prop;
+ int len;
+
+ if (depth != 1)
+ return 0;
+
+ if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
+ if (!prop || len < sizeof(__be64))
+ /*
+ * Nothing in the device tree
+ */
+ *mem_block_size = MIN_MEMORY_BLOCK_SIZE;
+ else
+ *mem_block_size = be64_to_cpup(prop);
+ return 1;
+}
+
+static unsigned long radix_memory_block_size(void)
+{
+ unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
+
+ /*
+ * OPAL firmware feature is set by now. Hence we are ok
+ * to test OPAL feature.
+ */
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ mem_block_size = 1UL * 1024 * 1024 * 1024;
+ else
+ of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
+
+ return mem_block_size;
+}
+
+#else /* CONFIG_MEMORY_HOTPLUG */
+
+static unsigned long radix_memory_block_size(void)
+{
+ return 1UL * 1024 * 1024 * 1024;
+}
+
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+
void __init radix__early_init_devtree(void)
{
int rc;
@@ -487,17 +551,27 @@ void __init radix__early_init_devtree(void)
* Try to find the available page sizes in the device-tree
*/
rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
- if (rc != 0) /* Found */
- goto found;
+ if (!rc) {
+ /*
+ * No page size details found in device tree.
+ * Let's assume we have page 4k and 64k support
+ */
+ mmu_psize_defs[MMU_PAGE_4K].shift = 12;
+ mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+
+ mmu_psize_defs[MMU_PAGE_64K].shift = 16;
+ mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+ }
+
/*
- * let's assume we have page 4k and 64k support
+ * Max mapping size used when mapping pages. We don't use
+ * ppc_md.memory_block_size() here because this get called
+ * early and we don't have machine probe called yet. Also
+ * the pseries implementation only check for ibm,lmb-size.
+ * All hypervisor supporting radix do expose that device
+ * tree node.
*/
- mmu_psize_defs[MMU_PAGE_4K].shift = 12;
- mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
-
- mmu_psize_defs[MMU_PAGE_64K].shift = 16;
- mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
-found:
+ radix_mem_block_size = radix_memory_block_size();
return;
}
@@ -519,8 +593,10 @@ void setup_kuep(bool disabled)
if (disabled || !early_radix_enabled())
return;
- if (smp_processor_id() == boot_cpuid)
+ if (smp_processor_id() == boot_cpuid) {
pr_info("Activating Kernel Userspace Execution Prevention\n");
+ cur_cpu_spec->mmu_features |= MMU_FTR_KUEP;
+ }
/*
* Radix always uses key0 of the IAMR to determine if an access is
@@ -544,6 +620,10 @@ void setup_kuap(bool disabled)
/* Make sure userspace can't change the AMR */
mtspr(SPRN_UAMOR, 0);
+
+ /*
+ * Set the default kernel AMR values on all cpus.
+ */
mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
isync();
}
@@ -700,30 +780,19 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
pud_clear(pud);
}
-struct change_mapping_params {
- pte_t *pte;
- unsigned long start;
- unsigned long end;
- unsigned long aligned_start;
- unsigned long aligned_end;
-};
-
-static int __meminit stop_machine_change_mapping(void *data)
+static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
{
- struct change_mapping_params *params =
- (struct change_mapping_params *)data;
+ pud_t *pud;
+ int i;
- if (!data)
- return -1;
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ pud = pud_start + i;
+ if (!pud_none(*pud))
+ return;
+ }
- spin_unlock(&init_mm.page_table_lock);
- pte_clear(&init_mm, params->aligned_start, params->pte);
- create_physical_mapping(__pa(params->aligned_start),
- __pa(params->start), -1, PAGE_KERNEL);
- create_physical_mapping(__pa(params->end), __pa(params->aligned_end),
- -1, PAGE_KERNEL);
- spin_lock(&init_mm.page_table_lock);
- return 0;
+ pud_free(&init_mm, pud_start);
+ p4d_clear(p4d);
}
static void remove_pte_table(pte_t *pte_start, unsigned long addr,
@@ -754,53 +823,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
}
}
-/*
- * clear the pte and potentially split the mapping helper
- */
-static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
- unsigned long size, pte_t *pte)
-{
- unsigned long mask = ~(size - 1);
- unsigned long aligned_start = addr & mask;
- unsigned long aligned_end = addr + size;
- struct change_mapping_params params;
- bool split_region = false;
-
- if ((end - addr) < size) {
- /*
- * We're going to clear the PTE, but not flushed
- * the mapping, time to remap and flush. The
- * effects if visible outside the processor or
- * if we are running in code close to the
- * mapping we cleared, we are in trouble.
- */
- if (overlaps_kernel_text(aligned_start, addr) ||
- overlaps_kernel_text(end, aligned_end)) {
- /*
- * Hack, just return, don't pte_clear
- */
- WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
- "text, not splitting\n", addr, end);
- return;
- }
- split_region = true;
- }
-
- if (split_region) {
- params.pte = pte;
- params.start = addr;
- params.end = end;
- params.aligned_start = addr & ~(size - 1);
- params.aligned_end = min_t(unsigned long, aligned_end,
- (unsigned long)__va(memblock_end_of_DRAM()));
- stop_machine(stop_machine_change_mapping, &params, NULL);
- return;
- }
-
- pte_clear(&init_mm, addr, pte);
-}
-
-static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
+static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
unsigned long end)
{
unsigned long next;
@@ -815,7 +838,12 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
continue;
if (pmd_is_leaf(*pmd)) {
- split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
+ !IS_ALIGNED(next, PMD_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
continue;
}
@@ -825,7 +853,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
}
}
-static void remove_pud_table(pud_t *pud_start, unsigned long addr,
+static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
unsigned long end)
{
unsigned long next;
@@ -840,7 +868,12 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
continue;
if (pud_is_leaf(*pud)) {
- split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
+ if (!IS_ALIGNED(addr, PUD_SIZE) ||
+ !IS_ALIGNED(next, PUD_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+ pte_clear(&init_mm, addr, (pte_t *)pud);
continue;
}
@@ -868,12 +901,19 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
continue;
if (p4d_is_leaf(*p4d)) {
- split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d);
+ if (!IS_ALIGNED(addr, P4D_SIZE) ||
+ !IS_ALIGNED(next, P4D_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+
+ pte_clear(&init_mm, addr, (pte_t *)pgd);
continue;
}
pud_base = (pud_t *)p4d_page_vaddr(*p4d);
remove_pud_table(pud_base, addr, next);
+ free_pud_table(pud_base, p4d);
}
spin_unlock(&init_mm.page_table_lock);
@@ -889,7 +929,8 @@ int __meminit radix__create_section_mapping(unsigned long start,
return -1;
}
- return create_physical_mapping(__pa(start), __pa(end), nid, prot);
+ return create_physical_mapping(__pa(start), __pa(end),
+ radix_mem_block_size, nid, prot);
}
int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index b5cc9b23cf02..0d233763441f 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -16,6 +16,7 @@
#include <asm/tlbflush.h>
#include <asm/trace.h>
#include <asm/cputhreads.h>
+#include <asm/plpar_wrappers.h>
#define RIC_FLUSH_TLB 0
#define RIC_FLUSH_PWC 1
@@ -694,7 +695,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
goto local;
}
- if (cputlb_use_tlbie()) {
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+ H_RPTI_PAGE_ALL, 0, -1UL);
+ } else if (cputlb_use_tlbie()) {
if (mm_needs_flush_escalation(mm))
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
@@ -727,7 +735,16 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
goto local;
}
}
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type,
+ H_RPTI_PAGE_ALL, 0, -1UL);
+ } else if (cputlb_use_tlbie())
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
@@ -760,7 +777,19 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
exit_flush_lazy_tlbs(mm);
goto local;
}
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt, pg_sizes, size;
+
+ tgt = H_RPTI_TARGET_CMMU;
+ pg_sizes = psize_to_rpti_pgsize(psize);
+ size = 1UL << mmu_psize_to_shift(psize);
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+ pg_sizes, vmaddr,
+ vmaddr + size);
+ } else if (cputlb_use_tlbie())
_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
else
_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
@@ -810,7 +839,14 @@ static inline void _tlbiel_kernel_broadcast(void)
*/
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
+ unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+
+ pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
+ start, end);
+ } else if (cputlb_use_tlbie())
_tlbie_pid(0, RIC_FLUSH_ALL);
else
_tlbiel_kernel_broadcast();
@@ -864,7 +900,17 @@ is_local:
nr_pages > tlb_local_single_page_flush_ceiling);
}
- if (full) {
+ if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
+ start, end);
+ } else if (full) {
if (local) {
_tlbiel_pid(pid, RIC_FLUSH_TLB);
} else {
@@ -1046,7 +1092,17 @@ is_local:
nr_pages > tlb_local_single_page_flush_ceiling);
}
- if (full) {
+ if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long type = H_RPTI_TYPE_TLB;
+ unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
+
+ if (also_pwc)
+ type |= H_RPTI_TYPE_PWC;
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
+ } else if (full) {
if (local) {
_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
} else {
@@ -1111,7 +1167,19 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
exit_flush_lazy_tlbs(mm);
goto local;
}
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt, type, pg_sizes;
+
+ tgt = H_RPTI_TARGET_CMMU;
+ type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+ pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
+ addr, end);
+ } else if (cputlb_use_tlbie())
_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
else
_tlbiel_va_range_multicast(mm,
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index b83abbead4a2..8acd00178956 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
}
ret = 0;
- *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+ *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL);
if (unlikely(*flt & VM_FAULT_ERROR)) {
if (*flt & VM_FAULT_OOM) {
ret = -ENOMEM;
@@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
BUG();
}
- if (*flt & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
-
out_unlock:
mmap_read_unlock(mm);
return ret;
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 59327cefbc6a..b2eeea39684c 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -14,6 +14,8 @@
#include <asm/prom.h>
#include <asm/drmem.h>
+static int n_root_addr_cells, n_root_size_cells;
+
static struct drmem_lmb_info __drmem_info;
struct drmem_lmb_info *drmem_info = &__drmem_info;
@@ -189,12 +191,13 @@ int drmem_update_dt(void)
return rc;
}
-static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
+static void read_drconf_v1_cell(struct drmem_lmb *lmb,
const __be32 **prop)
{
const __be32 *p = *prop;
- lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+ lmb->base_addr = of_read_number(p, n_root_addr_cells);
+ p += n_root_addr_cells;
lmb->drc_index = of_read_number(p++, 1);
p++; /* skip reserved field */
@@ -205,29 +208,33 @@ static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
*prop = p;
}
-static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
- void (*func)(struct drmem_lmb *, const __be32 **))
+static int
+__walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *))
{
struct drmem_lmb lmb;
u32 i, n_lmbs;
+ int ret = 0;
n_lmbs = of_read_number(prop++, 1);
- if (n_lmbs == 0)
- return;
-
for (i = 0; i < n_lmbs; i++) {
read_drconf_v1_cell(&lmb, &prop);
- func(&lmb, &usm);
+ ret = func(&lmb, &usm, data);
+ if (ret)
+ break;
}
+
+ return ret;
}
-static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
const __be32 **prop)
{
const __be32 *p = *prop;
dr_cell->seq_lmbs = of_read_number(p++, 1);
- dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+ dr_cell->base_addr = of_read_number(p, n_root_addr_cells);
+ p += n_root_addr_cells;
dr_cell->drc_index = of_read_number(p++, 1);
dr_cell->aa_index = of_read_number(p++, 1);
dr_cell->flags = of_read_number(p++, 1);
@@ -235,17 +242,16 @@ static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
*prop = p;
}
-static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
- void (*func)(struct drmem_lmb *, const __be32 **))
+static int
+__walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *))
{
struct of_drconf_cell_v2 dr_cell;
struct drmem_lmb lmb;
u32 i, j, lmb_sets;
+ int ret = 0;
lmb_sets = of_read_number(prop++, 1);
- if (lmb_sets == 0)
- return;
-
for (i = 0; i < lmb_sets; i++) {
read_drconf_v2_cell(&dr_cell, &prop);
@@ -259,21 +265,29 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
lmb.aa_index = dr_cell.aa_index;
lmb.flags = dr_cell.flags;
- func(&lmb, &usm);
+ ret = func(&lmb, &usm, data);
+ if (ret)
+ break;
}
}
+
+ return ret;
}
#ifdef CONFIG_PPC_PSERIES
-void __init walk_drmem_lmbs_early(unsigned long node,
- void (*func)(struct drmem_lmb *, const __be32 **))
+int __init walk_drmem_lmbs_early(unsigned long node, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *))
{
const __be32 *prop, *usm;
- int len;
+ int len, ret = -ENODEV;
prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
if (!prop || len < dt_root_size_cells * sizeof(__be32))
- return;
+ return ret;
+
+ /* Get the address & size cells */
+ n_root_addr_cells = dt_root_addr_cells;
+ n_root_size_cells = dt_root_size_cells;
drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
@@ -281,20 +295,21 @@ void __init walk_drmem_lmbs_early(unsigned long node,
prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len);
if (prop) {
- __walk_drmem_v1_lmbs(prop, usm, func);
+ ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
} else {
prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2",
&len);
if (prop)
- __walk_drmem_v2_lmbs(prop, usm, func);
+ ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
}
memblock_dump_all();
+ return ret;
}
#endif
-static int __init init_drmem_lmb_size(struct device_node *dn)
+static int init_drmem_lmb_size(struct device_node *dn)
{
const __be32 *prop;
int len;
@@ -303,12 +318,12 @@ static int __init init_drmem_lmb_size(struct device_node *dn)
return 0;
prop = of_get_property(dn, "ibm,lmb-size", &len);
- if (!prop || len < dt_root_size_cells * sizeof(__be32)) {
+ if (!prop || len < n_root_size_cells * sizeof(__be32)) {
pr_info("Could not determine LMB size\n");
return -1;
}
- drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+ drmem_info->lmb_size = of_read_number(prop, n_root_size_cells);
return 0;
}
@@ -329,24 +344,36 @@ static const __be32 *of_get_usable_memory(struct device_node *dn)
return prop;
}
-void __init walk_drmem_lmbs(struct device_node *dn,
- void (*func)(struct drmem_lmb *, const __be32 **))
+int walk_drmem_lmbs(struct device_node *dn, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *))
{
const __be32 *prop, *usm;
+ int ret = -ENODEV;
+
+ if (!of_root)
+ return ret;
+
+ /* Get the address & size cells */
+ of_node_get(of_root);
+ n_root_addr_cells = of_n_addr_cells(of_root);
+ n_root_size_cells = of_n_size_cells(of_root);
+ of_node_put(of_root);
if (init_drmem_lmb_size(dn))
- return;
+ return ret;
usm = of_get_usable_memory(dn);
prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
if (prop) {
- __walk_drmem_v1_lmbs(prop, usm, func);
+ ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
} else {
prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
if (prop)
- __walk_drmem_v2_lmbs(prop, usm, func);
+ ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
}
+
+ return ret;
}
static void __init init_drmem_v1_lmbs(const __be32 *prop)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 641fc5f3d7dd..0add963a849b 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -42,39 +42,7 @@
#include <asm/kup.h>
#include <asm/inst.h>
-/*
- * Check whether the instruction inst is a store using
- * an update addressing form which will update r1.
- */
-static bool store_updates_sp(struct ppc_inst inst)
-{
- /* check for 1 in the rA field */
- if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1)
- return false;
- /* check major opcode */
- switch (ppc_inst_primary_opcode(inst)) {
- case OP_STWU:
- case OP_STBU:
- case OP_STHU:
- case OP_STFSU:
- case OP_STFDU:
- return true;
- case OP_STD: /* std or stdu */
- return (ppc_inst_val(inst) & 3) == 1;
- case OP_31:
- /* check minor opcode */
- switch ((ppc_inst_val(inst) >> 1) & 0x3ff) {
- case OP_31_XOP_STDUX:
- case OP_31_XOP_STWUX:
- case OP_31_XOP_STBUX:
- case OP_31_XOP_STHUX:
- case OP_31_XOP_STFSUX:
- case OP_31_XOP_STFDUX:
- return true;
- }
- }
- return false;
-}
+
/*
* do_page_fault error handling helpers
*/
@@ -267,54 +235,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
return false;
}
-static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
- struct vm_area_struct *vma, unsigned int flags,
- bool *must_retry)
-{
- /*
- * N.B. The POWER/Open ABI allows programs to access up to
- * 288 bytes below the stack pointer.
- * The kernel signal delivery code writes up to about 1.5kB
- * below the stack pointer (r1) before decrementing it.
- * The exec code can write slightly over 640kB to the stack
- * before setting the user r1. Thus we allow the stack to
- * expand to 1MB without further checks.
- */
- if (address + 0x100000 < vma->vm_end) {
- struct ppc_inst __user *nip = (struct ppc_inst __user *)regs->nip;
- /* get user regs even if this fault is in kernel mode */
- struct pt_regs *uregs = current->thread.regs;
- if (uregs == NULL)
- return true;
-
- /*
- * A user-mode access to an address a long way below
- * the stack pointer is only valid if the instruction
- * is one which would update the stack pointer to the
- * address accessed if the instruction completed,
- * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
- * (or the byte, halfword, float or double forms).
- *
- * If we don't check this then any write to the area
- * between the last mapped region and the stack will
- * expand the stack rather than segfaulting.
- */
- if (address + 2048 >= uregs->gpr[1])
- return false;
-
- if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
- access_ok(nip, sizeof(*nip))) {
- struct ppc_inst inst;
-
- if (!probe_user_read_inst(&inst, nip))
- return !store_updates_sp(inst);
- *must_retry = true;
- }
- return true;
- }
- return false;
-}
-
#ifdef CONFIG_PPC_MEM_KEYS
static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
struct vm_area_struct *vma)
@@ -480,7 +400,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
int is_user = user_mode(regs);
int is_write = page_fault_is_write(error_code);
vm_fault_t fault, major = 0;
- bool must_retry = false;
bool kprobe_fault = kprobe_page_fault(regs, 11);
if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
@@ -569,30 +488,15 @@ retry:
vma = find_vma(mm, address);
if (unlikely(!vma))
return bad_area(regs, address);
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
- return bad_area(regs, address);
- /* The stack is being expanded, check if it's valid */
- if (unlikely(bad_stack_expansion(regs, address, vma, flags,
- &must_retry))) {
- if (!must_retry)
+ if (unlikely(vma->vm_start > address)) {
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
return bad_area(regs, address);
- mmap_read_unlock(mm);
- if (fault_in_pages_readable((const char __user *)regs->nip,
- sizeof(unsigned int)))
- return bad_area_nosemaphore(regs, address);
- goto retry;
+ if (unlikely(expand_stack(vma, address)))
+ return bad_area(regs, address);
}
- /* Try to expand it */
- if (unlikely(expand_stack(vma, address)))
- return bad_area(regs, address);
-
-good_area:
-
#ifdef CONFIG_PPC_MEM_KEYS
if (unlikely(access_pkey_error(is_write, is_exec,
(error_code & DSISR_KEYFAULT), vma)))
@@ -607,7 +511,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
major |= fault & VM_FAULT_MAJOR;
@@ -633,14 +537,9 @@ good_area:
/*
* Major/minor page fault accounting.
*/
- if (major) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+ if (major)
cmo_account_page_fault();
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
- }
+
return 0;
}
NOKPROBE_SYMBOL(__do_page_fault);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e9bfbccd975d..26292544630f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -684,3 +684,21 @@ void flush_dcache_icache_hugepage(struct page *page)
}
}
}
+
+void __init gigantic_hugetlb_cma_reserve(void)
+{
+ unsigned long order = 0;
+
+ if (radix_enabled())
+ order = PUD_SHIFT - PAGE_SHIFT;
+ else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift)
+ /*
+ * For pseries we do use ibm,expected#pages for reserving 16G pages.
+ */
+ order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT;
+
+ if (order) {
+ VM_WARN_ON(order < MAX_ORDER);
+ hugetlb_cma_reserve(order);
+ }
+}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 5a5469eb3174..02c7db4087cb 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/io.h>
#include <asm/mmu.h>
@@ -171,6 +170,8 @@ void __init MMU_init(void)
btext_unmap();
#endif
+ kasan_mmu_init();
+
setup_kup();
/* Shortly after that, the entire linear mapping will be available */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index bc73abf0bc25..02e127fa5777 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -225,12 +225,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
* fall back to system memory if the altmap allocation fail.
*/
if (altmap && !altmap_cross_boundary(altmap, start, page_size)) {
- p = altmap_alloc_block_buf(page_size, altmap);
+ p = vmemmap_alloc_block_buf(page_size, node, altmap);
if (!p)
pr_debug("altmap block allocation failed, falling back to system memory");
}
if (!p)
- p = vmemmap_alloc_block_buf(page_size, node);
+ p = vmemmap_alloc_block_buf(page_size, node, NULL);
if (!p)
return -ENOMEM;
@@ -406,13 +406,15 @@ static void __init early_check_vec5(void)
}
if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
OV5_FEAT(OV5_RADIX_GTSE))) {
- pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
- }
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
+ } else
+ cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
/* Do radix anyway - the hypervisor said we had to */
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
/* Hypervisor only supports hash - disable radix */
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
}
}
diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
index 569d98a41881..2784224054f8 100644
--- a/arch/powerpc/mm/kasan/8xx.c
+++ b/arch/powerpc/mm/kasan/8xx.c
@@ -5,7 +5,6 @@
#include <linux/kasan.h>
#include <linux/memblock.h>
#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
static int __init
kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block)
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
index a32b4640b9de..202bd260a009 100644
--- a/arch/powerpc/mm/kasan/book3s_32.c
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -4,7 +4,6 @@
#include <linux/kasan.h>
#include <linux/memblock.h>
-#include <asm/pgalloc.h>
#include <mm/mmu_decl.h>
int __init kasan_init_region(void *start, size_t size)
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 0760e1e754e4..fb294046e00e 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -115,16 +115,35 @@ static void __init kasan_unmap_early_shadow_vmalloc(void)
unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END);
kasan_update_early_region(k_start, k_end, __pte(0));
+
+#ifdef MODULES_VADDR
+ k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR);
+ k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END);
+ kasan_update_early_region(k_start, k_end, __pte(0));
+#endif
}
-static void __init kasan_mmu_init(void)
+void __init kasan_mmu_init(void)
{
int ret;
+
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
+ IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+}
+
+void __init kasan_init(void)
+{
struct memblock_region *reg;
for_each_memblock(memory, reg) {
phys_addr_t base = reg->base;
phys_addr_t top = min(base + reg->size, total_lowmem);
+ int ret;
if (base >= top)
continue;
@@ -134,20 +153,6 @@ static void __init kasan_mmu_init(void)
panic("kasan: kasan_init_region() failed");
}
- if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
- IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
- ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
-
- if (ret)
- panic("kasan: kasan_init_shadow_page_tables() failed");
- }
-
-}
-
-void __init kasan_init(void)
-{
- kasan_mmu_init();
-
kasan_remap_early_shadow_ro();
clear_page(kasan_early_shadow_page);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c2c11eb8dcfc..42e25874f5a8 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -34,7 +34,6 @@
#include <linux/dma-direct.h>
#include <linux/kprobes.h>
-#include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
@@ -127,8 +126,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
unsigned long nr_pages = size >> PAGE_SHIFT;
int rc;
- resize_hpt_for_hotplug(memblock_phys_mem_size());
-
start = (unsigned long)__va(start);
rc = create_section_mapping(start, start + size, nid,
params->pgprot);
@@ -161,9 +158,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
* hit that section of memory
*/
vm_unmap_aliases();
-
- if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
- pr_warn("Hash collision while resizing HPT\n");
}
#endif
@@ -184,8 +178,6 @@ void __init mem_topology_setup(void)
void __init initmem_init(void)
{
- /* XXX need to clip this if using highmem? */
- sparse_memory_present_with_active_regions(0);
sparse_init();
}
diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c
index 13e74bc39ba5..95751c322f6c 100644
--- a/arch/powerpc/mm/nohash/40x.c
+++ b/arch/powerpc/mm/nohash/40x.c
@@ -32,7 +32,6 @@
#include <linux/highmem.h>
#include <linux/memblock.h>
-#include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 92e8929cbe3e..d2b37146ae6c 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -13,7 +13,6 @@
#include <asm/fixmap.h>
#include <asm/code-patching.h>
#include <asm/inst.h>
-#include <asm/pgalloc.h>
#include <mm/mmu_decl.h>
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c b/arch/powerpc/mm/nohash/fsl_booke.c
index c06dfbb771f4..0c294827d6e5 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -37,7 +37,6 @@
#include <linux/highmem.h>
#include <linux/memblock.h>
-#include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index bce0e5349978..4c74e8a5482b 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -15,7 +15,6 @@
#include <linux/libfdt.h>
#include <linux/crash_core.h>
#include <asm/cacheflush.h>
-#include <asm/pgalloc.h>
#include <asm/prom.h>
#include <asm/kdump.h>
#include <mm/mmu_decl.h>
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 696f568253a0..14514585db98 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -34,6 +34,7 @@
#include <linux/of_fdt.h>
#include <linux/hugetlb.h>
+#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/code-patching.h>
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index d5e2704d0096..bf24451f3e71 100644
--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -71,7 +71,6 @@ START_BTB_FLUSH_SECTION
END_BTB_FLUSH_SECTION
std r7,EX_TLB_R7(r12)
#endif
- TLB_MISS_PROLOG_STATS
.endm
.macro tlb_epilog_bolted
@@ -85,7 +84,6 @@ END_BTB_FLUSH_SECTION
mtcr r14
ld r14,EX_TLB_R14(r12)
ld r15,EX_TLB_R15(r12)
- TLB_MISS_RESTORE_STATS
ld r16,EX_TLB_R16(r12)
mfspr r12,SPRN_SPRG_GEN_SCRATCH
.endm
@@ -128,7 +126,6 @@ END_BTB_FLUSH_SECTION
ori r10,r10,_PAGE_PRESENT
oris r11,r10,_PAGE_ACCESSED@h
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne tlb_miss_kernel_bolted
tlb_miss_common_bolted:
@@ -209,7 +206,6 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
tlbwe
tlb_miss_done_bolted:
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
tlb_epilog_bolted
rfi
@@ -229,11 +225,9 @@ tlb_miss_fault_bolted:
andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
bne itlb_miss_fault_bolted
dtlb_miss_fault_bolted:
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_data_storage_book3e
itlb_miss_fault_bolted:
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_instruction_storage_book3e
@@ -243,7 +237,6 @@ itlb_miss_fault_bolted:
rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
srdi r15,r16,60 /* get region */
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne- itlb_miss_fault_bolted
li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */
@@ -276,7 +269,6 @@ itlb_miss_fault_bolted:
srdi. r15,r16,60 /* get region */
ori r16,r16,1
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne tlb_miss_kernel_e6500 /* user/kernel test */
b tlb_miss_common_e6500
@@ -288,7 +280,6 @@ itlb_miss_fault_bolted:
srdi. r15,r16,60 /* get region */
rldicr r16,r16,0,62
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne tlb_miss_kernel_e6500 /* user vs kernel check */
/*
@@ -460,7 +451,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
.endm
tlb_unlock_e6500
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
tlb_epilog_bolted
rfi
@@ -519,11 +509,9 @@ tlb_miss_fault_e6500:
andi. r16,r16,1
bne itlb_miss_fault_e6500
dtlb_miss_fault_e6500:
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_data_storage_book3e
itlb_miss_fault_e6500:
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_instruction_storage_book3e
#endif /* CONFIG_PPC_FSL_BOOK3E */
@@ -548,7 +536,6 @@ itlb_miss_fault_e6500:
mfspr r16,SPRN_DEAR /* get faulting address */
srdi r15,r16,60 /* get region */
cmpldi cr0,r15,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
beq tlb_load_linear /* yes -> go to linear map load */
/* The page tables are mapped virtually linear. At this point, though,
@@ -600,7 +587,6 @@ itlb_miss_fault_e6500:
/* We got a crappy address, just fault with whatever DEAR and ESR
* are here
*/
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
@@ -624,7 +610,6 @@ itlb_miss_fault_e6500:
*/
srdi r15,r16,60 /* get region */
cmpldi cr0,r15,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
beq tlb_load_linear /* yes -> go to linear map load */
/* We do the user/kernel test for the PID here along with the RW test
@@ -646,7 +631,6 @@ itlb_miss_fault_e6500:
beq+ normal_tlb_miss
/* We got a crappy address, just fault */
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
@@ -745,7 +729,6 @@ normal_tlb_miss_done:
* level 0 and just going back to userland. They are only needed
* if you are going to take an access fault
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
TLB_MISS_EPILOG_SUCCESS
rfi
@@ -757,11 +740,9 @@ normal_tlb_miss_access_fault:
ld r15,EX_TLB_ESR(r12)
mtspr SPRN_DEAR,r14
mtspr SPRN_ESR,r15
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
-1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
- TLB_MISS_EPILOG_ERROR
+1: TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
@@ -899,7 +880,6 @@ virt_page_table_tlb_miss_done:
1:
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
/* Return to caller, normal case */
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK);
TLB_MISS_EPILOG_SUCCESS
rfi
@@ -935,18 +915,15 @@ virt_page_table_tlb_miss_fault:
beq 1f
mtspr SPRN_DEAR,r15
mtspr SPRN_ESR,r16
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT);
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
-1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT);
- TLB_MISS_EPILOG_ERROR
+1: TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
virt_page_table_tlb_miss_whacko_fault:
/* The linear fault will restart everything so ESR and DEAR will
* not have been clobbered, let's just fault with what we have
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT);
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
@@ -971,7 +948,6 @@ virt_page_table_tlb_miss_whacko_fault:
mfspr r16,SPRN_DEAR /* get faulting address */
srdi r11,r16,60 /* get region */
cmpldi cr0,r11,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
beq tlb_load_linear /* yes -> go to linear map load */
/* We do the user/kernel test for the PID here along with the RW test
@@ -991,7 +967,6 @@ virt_page_table_tlb_miss_whacko_fault:
/* We got a crappy address, just fault with whatever DEAR and ESR
* are here
*/
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
@@ -1015,7 +990,6 @@ virt_page_table_tlb_miss_whacko_fault:
*/
srdi r11,r16,60 /* get region */
cmpldi cr0,r11,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
beq tlb_load_linear /* yes -> go to linear map load */
/* We do the user/kernel test for the PID here along with the RW test
@@ -1033,7 +1007,6 @@ virt_page_table_tlb_miss_whacko_fault:
beq+ htw_tlb_miss
/* We got a crappy address, just fault */
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
@@ -1130,7 +1103,6 @@ htw_tlb_miss_done:
* level 0 and just going back to userland. They are only needed
* if you are going to take an access fault
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK)
TLB_MISS_EPILOG_SUCCESS
rfi
@@ -1142,11 +1114,9 @@ htw_tlb_miss_fault:
beq 1f
mtspr SPRN_DEAR,r16
mtspr SPRN_ESR,r14
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
-1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT)
- TLB_MISS_EPILOG_ERROR
+1: TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
/*
@@ -1221,7 +1191,6 @@ tlb_load_linear_done:
* We do that because we can't resume a fault within a TLB
* miss handler, due to MAS and TLB reservation being clobbered.
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR)
TLB_MISS_EPILOG_ERROR
rfi
@@ -1233,13 +1202,3 @@ tlb_load_linear_fault:
b exc_data_storage_book3e
1: TLB_MISS_EPILOG_ERROR_SPECIAL
b exc_instruction_storage_book3e
-
-
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-.tlb_stat_inc:
-1: ldarx r8,0,r9
- addi r8,r8,1
- stdcx. r8,0,r9
- bne- 1b
- blr
-#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9fcf2d195830..1f61fa2148b5 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -221,7 +221,8 @@ static void initialize_distance_lookup_table(int nid,
}
}
-/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
+/*
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
* info is found.
*/
static int associativity_to_nid(const __be32 *associativity)
@@ -235,7 +236,7 @@ static int associativity_to_nid(const __be32 *associativity)
nid = of_read_number(&associativity[min_common_depth], 1);
/* POWER4 LPAR uses 0xffff as invalid node */
- if (nid == 0xffff || nid >= MAX_NUMNODES)
+ if (nid == 0xffff || nid >= nr_node_ids)
nid = NUMA_NO_NODE;
if (nid > 0 &&
@@ -448,7 +449,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
nid = of_read_number(&aa.arrays[index], 1);
- if (nid == 0xffff || nid >= MAX_NUMNODES)
+ if (nid == 0xffff || nid >= nr_node_ids)
nid = default_nid;
if (nid > 0) {
@@ -644,8 +645,9 @@ static inline int __init read_usm_ranges(const __be32 **usm)
* Extract NUMA information from the ibm,dynamic-reconfiguration-memory
* node. This assumes n_mem_{addr,size}_cells have been set.
*/
-static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
- const __be32 **usm)
+static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm,
+ void *data)
{
unsigned int ranges, is_kexec_kdump = 0;
unsigned long base, size, sz;
@@ -657,7 +659,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
*/
if ((lmb->flags & DRCONF_MEM_RESERVED)
|| !(lmb->flags & DRCONF_MEM_ASSIGNED))
- return;
+ return 0;
if (*usm)
is_kexec_kdump = 1;
@@ -669,7 +671,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
if (is_kexec_kdump) {
ranges = read_usm_ranges(usm);
if (!ranges) /* there are no (base, size) duple */
- return;
+ return 0;
}
do {
@@ -686,6 +688,8 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
if (sz)
memblock_set_node(base, sz, &memblock.memory, nid);
} while (--ranges);
+
+ return 0;
}
static int __init parse_numa_properties(void)
@@ -787,7 +791,7 @@ new_range:
*/
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (memory) {
- walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
+ walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
of_node_put(memory);
}
@@ -949,7 +953,6 @@ void __init initmem_init(void)
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
setup_node_data(nid, start_pfn, end_pfn);
- sparse_memory_present_with_active_regions(nid);
}
sparse_init();
@@ -984,28 +987,6 @@ static int __init early_numa(char *p)
}
early_param("numa", early_numa);
-/*
- * The platform can inform us through one of several mechanisms
- * (post-migration device tree updates, PRRN or VPHN) that the NUMA
- * assignment of a resource has changed. This controls whether we act
- * on that. Disabled by default.
- */
-static bool topology_updates_enabled;
-
-static int __init early_topology_updates(char *p)
-{
- if (!p)
- return 0;
-
- if (!strcmp(p, "on")) {
- pr_warn("Caution: enabling topology updates\n");
- topology_updates_enabled = true;
- }
-
- return 0;
-}
-early_param("topology_updates", early_topology_updates);
-
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Find the node associated with a hot added memory section for
@@ -1144,98 +1125,9 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
-struct topology_update_data {
- struct topology_update_data *next;
- unsigned int cpu;
- int old_nid;
- int new_nid;
-};
-
-#define TOPOLOGY_DEF_TIMER_SECS 60
-
-static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
-static cpumask_t cpu_associativity_changes_mask;
-static int vphn_enabled;
-static int prrn_enabled;
-static void reset_topology_timer(void);
-static int topology_timer_secs = 1;
static int topology_inited;
/*
- * Change polling interval for associativity changes.
- */
-int timed_topology_update(int nsecs)
-{
- if (vphn_enabled) {
- if (nsecs > 0)
- topology_timer_secs = nsecs;
- else
- topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
-
- reset_topology_timer();
- }
-
- return 0;
-}
-
-/*
- * Store the current values of the associativity change counters in the
- * hypervisor.
- */
-static void setup_cpu_associativity_change_counters(void)
-{
- int cpu;
-
- /* The VPHN feature supports a maximum of 8 reference points */
- BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
-
- for_each_possible_cpu(cpu) {
- int i;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++)
- counts[i] = hypervisor_counts[i];
- }
-}
-
-/*
- * The hypervisor maintains a set of 8 associativity change counters in
- * the VPA of each cpu that correspond to the associativity levels in the
- * ibm,associativity-reference-points property. When an associativity
- * level changes, the corresponding counter is incremented.
- *
- * Set a bit in cpu_associativity_changes_mask for each cpu whose home
- * node associativity levels have changed.
- *
- * Returns the number of cpus with unhandled associativity changes.
- */
-static int update_cpu_associativity_changes_mask(void)
-{
- int cpu;
- cpumask_t *changes = &cpu_associativity_changes_mask;
-
- for_each_possible_cpu(cpu) {
- int i, changed = 0;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++) {
- if (hypervisor_counts[i] != counts[i]) {
- counts[i] = hypervisor_counts[i];
- changed = 1;
- }
- }
- if (changed) {
- cpumask_or(changes, changes, cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- }
- }
-
- return cpumask_weight(changes);
-}
-
-/*
* Retrieve the new associativity information for a virtual processor's
* home node.
*/
@@ -1250,7 +1142,6 @@ static long vphn_get_associativity(unsigned long cpu,
switch (rc) {
case H_SUCCESS:
dbg("VPHN hcall succeeded. Reset polling...\n");
- timed_topology_update(0);
goto out;
case H_FUNCTION:
@@ -1269,8 +1160,6 @@ static long vphn_get_associativity(unsigned long cpu,
, rc);
break;
}
-
- stop_topology_update();
out:
return rc;
}
@@ -1314,380 +1203,8 @@ int find_and_online_cpu_nid(int cpu)
return new_nid;
}
-/*
- * Update the CPU maps and sysfs entries for a single CPU when its NUMA
- * characteristics change. This function doesn't perform any locking and is
- * only safe to call from stop_machine().
- */
-static int update_cpu_topology(void *data)
-{
- struct topology_update_data *update;
- unsigned long cpu;
-
- if (!data)
- return -EINVAL;
-
- cpu = smp_processor_id();
-
- for (update = data; update; update = update->next) {
- int new_nid = update->new_nid;
- if (cpu != update->cpu)
- continue;
-
- unmap_cpu_from_node(cpu);
- map_cpu_to_node(cpu, new_nid);
- set_cpu_numa_node(cpu, new_nid);
- set_cpu_numa_mem(cpu, local_memory_node(new_nid));
- vdso_getcpu_init();
- }
-
- return 0;
-}
-
-static int update_lookup_table(void *data)
-{
- struct topology_update_data *update;
-
- if (!data)
- return -EINVAL;
-
- /*
- * Upon topology update, the numa-cpu lookup table needs to be updated
- * for all threads in the core, including offline CPUs, to ensure that
- * future hotplug operations respect the cpu-to-node associativity
- * properly.
- */
- for (update = data; update; update = update->next) {
- int nid, base, j;
-
- nid = update->new_nid;
- base = cpu_first_thread_sibling(update->cpu);
-
- for (j = 0; j < threads_per_core; j++) {
- update_numa_cpu_lookup_table(base + j, nid);
- }
- }
-
- return 0;
-}
-
-/*
- * Update the node maps and sysfs entries for each cpu whose home node
- * has changed. Returns 1 when the topology has changed, and 0 otherwise.
- *
- * cpus_locked says whether we already hold cpu_hotplug_lock.
- */
-int numa_update_cpu_topology(bool cpus_locked)
-{
- unsigned int cpu, sibling, changed = 0;
- struct topology_update_data *updates, *ud;
- cpumask_t updated_cpus;
- struct device *dev;
- int weight, new_nid, i = 0;
-
- if (!prrn_enabled && !vphn_enabled && topology_inited)
- return 0;
-
- weight = cpumask_weight(&cpu_associativity_changes_mask);
- if (!weight)
- return 0;
-
- updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL);
- if (!updates)
- return 0;
-
- cpumask_clear(&updated_cpus);
-
- for_each_cpu(cpu, &cpu_associativity_changes_mask) {
- /*
- * If siblings aren't flagged for changes, updates list
- * will be too short. Skip on this update and set for next
- * update.
- */
- if (!cpumask_subset(cpu_sibling_mask(cpu),
- &cpu_associativity_changes_mask)) {
- pr_info("Sibling bits not set for associativity "
- "change, cpu%d\n", cpu);
- cpumask_or(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask,
- cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- continue;
- }
-
- new_nid = find_and_online_cpu_nid(cpu);
-
- if (new_nid == numa_cpu_lookup_table[cpu]) {
- cpumask_andnot(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask,
- cpu_sibling_mask(cpu));
- dbg("Assoc chg gives same node %d for cpu%d\n",
- new_nid, cpu);
- cpu = cpu_last_thread_sibling(cpu);
- continue;
- }
-
- for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
- ud = &updates[i++];
- ud->next = &updates[i];
- ud->cpu = sibling;
- ud->new_nid = new_nid;
- ud->old_nid = numa_cpu_lookup_table[sibling];
- cpumask_set_cpu(sibling, &updated_cpus);
- }
- cpu = cpu_last_thread_sibling(cpu);
- }
-
- /*
- * Prevent processing of 'updates' from overflowing array
- * where last entry filled in a 'next' pointer.
- */
- if (i)
- updates[i-1].next = NULL;
-
- pr_debug("Topology update for the following CPUs:\n");
- if (cpumask_weight(&updated_cpus)) {
- for (ud = &updates[0]; ud; ud = ud->next) {
- pr_debug("cpu %d moving from node %d "
- "to %d\n", ud->cpu,
- ud->old_nid, ud->new_nid);
- }
- }
-
- /*
- * In cases where we have nothing to update (because the updates list
- * is too short or because the new topology is same as the old one),
- * skip invoking update_cpu_topology() via stop-machine(). This is
- * necessary (and not just a fast-path optimization) since stop-machine
- * can end up electing a random CPU to run update_cpu_topology(), and
- * thus trick us into setting up incorrect cpu-node mappings (since
- * 'updates' is kzalloc()'ed).
- *
- * And for the similar reason, we will skip all the following updating.
- */
- if (!cpumask_weight(&updated_cpus))
- goto out;
-
- if (cpus_locked)
- stop_machine_cpuslocked(update_cpu_topology, &updates[0],
- &updated_cpus);
- else
- stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
-
- /*
- * Update the numa-cpu lookup table with the new mappings, even for
- * offline CPUs. It is best to perform this update from the stop-
- * machine context.
- */
- if (cpus_locked)
- stop_machine_cpuslocked(update_lookup_table, &updates[0],
- cpumask_of(raw_smp_processor_id()));
- else
- stop_machine(update_lookup_table, &updates[0],
- cpumask_of(raw_smp_processor_id()));
-
- for (ud = &updates[0]; ud; ud = ud->next) {
- unregister_cpu_under_node(ud->cpu, ud->old_nid);
- register_cpu_under_node(ud->cpu, ud->new_nid);
-
- dev = get_cpu_device(ud->cpu);
- if (dev)
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
- cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
- changed = 1;
- }
-
-out:
- kfree(updates);
- return changed;
-}
-
-int arch_update_cpu_topology(void)
-{
- return numa_update_cpu_topology(true);
-}
-
-static void topology_work_fn(struct work_struct *work)
-{
- rebuild_sched_domains();
-}
-static DECLARE_WORK(topology_work, topology_work_fn);
-
-static void topology_schedule_update(void)
-{
- schedule_work(&topology_work);
-}
-
-static void topology_timer_fn(struct timer_list *unused)
-{
- if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
- topology_schedule_update();
- else if (vphn_enabled) {
- if (update_cpu_associativity_changes_mask() > 0)
- topology_schedule_update();
- reset_topology_timer();
- }
-}
-static struct timer_list topology_timer;
-
-static void reset_topology_timer(void)
-{
- if (vphn_enabled)
- mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
-}
-
-#ifdef CONFIG_SMP
-
-static int dt_update_callback(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct of_reconfig_data *update = data;
- int rc = NOTIFY_DONE;
-
- switch (action) {
- case OF_RECONFIG_UPDATE_PROPERTY:
- if (of_node_is_type(update->dn, "cpu") &&
- !of_prop_cmp(update->prop->name, "ibm,associativity")) {
- u32 core_id;
- of_property_read_u32(update->dn, "reg", &core_id);
- rc = dlpar_cpu_readd(core_id);
- rc = NOTIFY_OK;
- }
- break;
- }
-
- return rc;
-}
-
-static struct notifier_block dt_update_nb = {
- .notifier_call = dt_update_callback,
-};
-
-#endif
-
-/*
- * Start polling for associativity changes.
- */
-int start_topology_update(void)
-{
- int rc = 0;
-
- if (!topology_updates_enabled)
- return 0;
-
- if (firmware_has_feature(FW_FEATURE_PRRN)) {
- if (!prrn_enabled) {
- prrn_enabled = 1;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_register(&dt_update_nb);
-#endif
- }
- }
- if (firmware_has_feature(FW_FEATURE_VPHN) &&
- lppaca_shared_proc(get_lppaca())) {
- if (!vphn_enabled) {
- vphn_enabled = 1;
- setup_cpu_associativity_change_counters();
- timer_setup(&topology_timer, topology_timer_fn,
- TIMER_DEFERRABLE);
- reset_topology_timer();
- }
- }
-
- pr_info("Starting topology update%s%s\n",
- (prrn_enabled ? " prrn_enabled" : ""),
- (vphn_enabled ? " vphn_enabled" : ""));
-
- return rc;
-}
-
-/*
- * Disable polling for VPHN associativity changes.
- */
-int stop_topology_update(void)
-{
- int rc = 0;
-
- if (!topology_updates_enabled)
- return 0;
-
- if (prrn_enabled) {
- prrn_enabled = 0;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_unregister(&dt_update_nb);
-#endif
- }
- if (vphn_enabled) {
- vphn_enabled = 0;
- rc = del_timer_sync(&topology_timer);
- }
-
- pr_info("Stopping topology update\n");
-
- return rc;
-}
-
-int prrn_is_enabled(void)
-{
- return prrn_enabled;
-}
-
-static int topology_read(struct seq_file *file, void *v)
-{
- if (vphn_enabled || prrn_enabled)
- seq_puts(file, "on\n");
- else
- seq_puts(file, "off\n");
-
- return 0;
-}
-
-static int topology_open(struct inode *inode, struct file *file)
-{
- return single_open(file, topology_read, NULL);
-}
-
-static ssize_t topology_write(struct file *file, const char __user *buf,
- size_t count, loff_t *off)
-{
- char kbuf[4]; /* "on" or "off" plus null. */
- int read_len;
-
- read_len = count < 3 ? count : 3;
- if (copy_from_user(kbuf, buf, read_len))
- return -EINVAL;
-
- kbuf[read_len] = '\0';
-
- if (!strncmp(kbuf, "on", 2)) {
- topology_updates_enabled = true;
- start_topology_update();
- } else if (!strncmp(kbuf, "off", 3)) {
- stop_topology_update();
- topology_updates_enabled = false;
- } else
- return -EINVAL;
-
- return count;
-}
-
-static const struct proc_ops topology_proc_ops = {
- .proc_read = seq_read,
- .proc_write = topology_write,
- .proc_open = topology_open,
- .proc_release = single_release,
-};
-
static int topology_update_init(void)
{
- start_topology_update();
-
- if (vphn_enabled)
- topology_schedule_update();
-
- if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops))
- return -ENOMEM;
-
topology_inited = 1;
return 0;
}
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index ee4bd6d38602..97ae4935da79 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -110,6 +110,9 @@ void pte_fragment_free(unsigned long *table, int kernel)
{
struct page *page = virt_to_page(table);
+ if (PageReserved(page))
+ return free_reserved_page(page);
+
BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
if (atomic_dec_and_test(&page->pt_frag_refcount)) {
if (!kernel)
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 1136257c3a99..9c0547d77af3 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -23,7 +23,6 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/hugetlb.h>
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index bb43a8c04bee..cc6e2f94517f 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -31,7 +31,6 @@
#include <linux/slab.h>
#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/prom.h>
#include <asm/mmu_context.h>
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index a2c33efc7ce8..ad6df9a2e7c8 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -17,10 +17,10 @@
#include <linux/seq_file.h>
#include <linux/const.h>
#include <asm/page.h>
-#include <asm/pgalloc.h>
#include <asm/plpar_wrappers.h>
#include <linux/memblock.h>
#include <asm/firmware.h>
+#include <asm/pgalloc.h>
struct pg_state {
struct seq_file *seq;
@@ -258,7 +258,7 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *
for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
- if (lpar_rc != H_SUCCESS)
+ if (lpar_rc)
continue;
for (j = 0; j < 4; j++) {
if (HPTE_V_COMPARE(ptes[j].v, want_v) &&
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index de6e05ef871c..aca354fb670b 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -21,7 +21,6 @@
#include <asm/fixmap.h>
#include <linux/const.h>
#include <asm/page.h>
-#include <asm/pgalloc.h>
#include <asm/hugetlb.h>
#include <mm/mmu_decl.h>
@@ -74,6 +73,10 @@ struct addr_marker {
static struct addr_marker address_markers[] = {
{ 0, "Start of kernel VM" },
+#ifdef MODULES_VADDR
+ { 0, "modules start" },
+ { 0, "modules end" },
+#endif
{ 0, "vmalloc() Area" },
{ 0, "vmalloc() End" },
#ifdef CONFIG_PPC64
@@ -195,6 +198,24 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
+static void note_page_update_state(struct pg_state *st, unsigned long addr,
+ unsigned int level, u64 val, unsigned long page_size)
+{
+ u64 flag = val & pg_level[level].mask;
+ u64 pa = val & PTE_RPN_MASK;
+
+ st->level = level;
+ st->current_flags = flag;
+ st->start_address = addr;
+ st->start_pa = pa;
+ st->page_size = page_size;
+
+ while (addr >= st->marker[1].start_address) {
+ st->marker++;
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ }
+}
+
static void note_page(struct pg_state *st, unsigned long addr,
unsigned int level, u64 val, unsigned long page_size)
{
@@ -203,13 +224,8 @@ static void note_page(struct pg_state *st, unsigned long addr,
/* At first no level is set */
if (!st->level) {
- st->level = level;
- st->current_flags = flag;
- st->start_address = addr;
- st->start_pa = pa;
- st->last_pa = pa;
- st->page_size = page_size;
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ note_page_update_state(st, addr, level, val, page_size);
/*
* Dump the section of virtual memory when:
* - the PTE flags from one entry to the next differs.
@@ -241,19 +257,9 @@ static void note_page(struct pg_state *st, unsigned long addr,
* Address indicates we have passed the end of the
* current section of virtual memory
*/
- while (addr >= st->marker[1].start_address) {
- st->marker++;
- pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
- }
- st->start_address = addr;
- st->start_pa = pa;
- st->last_pa = pa;
- st->page_size = page_size;
- st->current_flags = flag;
- st->level = level;
- } else {
- st->last_pa = pa;
+ note_page_update_state(st, addr, level, val, page_size);
}
+ st->last_pa = pa;
}
static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
@@ -348,7 +354,15 @@ static void populate_markers(void)
{
int i = 0;
+#ifdef CONFIG_PPC64
address_markers[i++].start_address = PAGE_OFFSET;
+#else
+ address_markers[i++].start_address = TASK_SIZE;
+#endif
+#ifdef MODULES_VADDR
+ address_markers[i++].start_address = MODULES_VADDR;
+ address_markers[i++].start_address = MODULES_END;
+#endif
address_markers[i++].start_address = VMALLOC_START;
address_markers[i++].start_address = VMALLOC_END;
#ifdef CONFIG_PPC64
@@ -385,7 +399,7 @@ static int ptdump_show(struct seq_file *m, void *v)
struct pg_state st = {
.seq = m,
.marker = address_markers,
- .start_address = PAGE_OFFSET,
+ .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
};
#ifdef CONFIG_PPC64
@@ -429,7 +443,7 @@ void ptdump_check_wx(void)
.seq = NULL,
.marker = address_markers,
.check_wx = true,
- .start_address = PAGE_OFFSET,
+ .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
};
#ifdef CONFIG_PPC64