aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/book3s64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/book3s64')
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugetlbpage.c1
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c1
-rw-r--r--arch/powerpc/mm/book3s64/hash_tlb.c1
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c25
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c8
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c300
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c1
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c219
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c82
9 files changed, 397 insertions, 241 deletions
diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
index 25acb9c5ee1b..964467b3a776 100644
--- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
@@ -10,7 +10,6 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 2a99167afbaf..fd9c7f91b092 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -9,7 +9,6 @@
#include <linux/mm_types.h>
#include <linux/mm.h>
-#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/mmu.h>
#include <asm/tlb.h>
diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index 0fbf3dc9f2c2..eb0bccaf221e 100644
--- a/arch/powerpc/mm/book3s64/hash_tlb.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -21,7 +21,6 @@
#include <linux/mm.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/bug.h>
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 9b9f92ad0e7a..1478fceeb683 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -232,8 +232,6 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
rflags |= HPTE_R_I;
else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
rflags |= (HPTE_R_I | HPTE_R_G);
- else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
- rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
else
/*
* Add memory coherence if cache inhibited is not set
@@ -596,7 +594,7 @@ static void __init htab_scan_page_sizes(void)
}
#ifdef CONFIG_HUGETLB_PAGE
- if (!hugetlb_disabled) {
+ if (!hugetlb_disabled && !early_radix_enabled() ) {
/* Reserve 16G huge page memory sections for huge pages */
of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
}
@@ -663,11 +661,10 @@ static void __init htab_init_page_sizes(void)
* Pick a size for the linear mapping. Currently, we only
* support 16M, 1M and 4K which is the default
*/
- if (IS_ENABLED(STRICT_KERNEL_RWX) &&
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) &&
(unsigned long)_stext % 0x1000000) {
if (mmu_psize_defs[MMU_PAGE_16M].shift)
- pr_warn("Kernel not 16M aligned, "
- "disabling 16M linear map alignment");
+ pr_warn("Kernel not 16M aligned, disabling 16M linear map alignment\n");
aligned = false;
}
@@ -788,7 +785,7 @@ static unsigned long __init htab_get_table_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int resize_hpt_for_hotplug(unsigned long new_mem_size)
+static int resize_hpt_for_hotplug(unsigned long new_mem_size)
{
unsigned target_hpt_shift;
@@ -822,6 +819,8 @@ int hash__create_section_mapping(unsigned long start, unsigned long end,
return -1;
}
+ resize_hpt_for_hotplug(memblock_phys_mem_size());
+
rc = htab_bolt_mapping(start, end, __pa(start),
pgprot_val(prot), mmu_linear_psize,
mmu_kernel_ssize);
@@ -839,6 +838,10 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end)
int rc = htab_remove_mapping(start, end, mmu_linear_psize,
mmu_kernel_ssize);
WARN_ON(rc < 0);
+
+ if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+ pr_warn("Hash collision while resizing HPT\n");
+
return rc;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
@@ -1111,6 +1114,10 @@ void hash__early_init_mmu_secondary(void)
if (cpu_has_feature(CPU_FTR_ARCH_206)
&& cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ mtspr(SPRN_UAMOR, default_uamor);
+#endif
}
#endif /* CONFIG_SMP */
@@ -1731,10 +1738,6 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
return gslot;
}
-/*
- * WARNING: This is called from hash_low_64.S, if you change this prototype,
- * do not forget to update the assembly call site !
- */
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
unsigned long flags)
{
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index c58ad1049909..e18ae50a275c 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -15,6 +15,7 @@
#include <asm/powernv.h>
#include <asm/firmware.h>
#include <asm/ultravisor.h>
+#include <asm/kexec.h>
#include <mm/mmu_decl.h>
#include <trace/events/thp.h>
@@ -165,6 +166,8 @@ void mmu_cleanup_all(void)
radix__mmu_cleanup_all();
else if (mmu_hash_ops.hpte_clear_all)
mmu_hash_ops.hpte_clear_all();
+
+ reset_sprs();
}
#ifdef CONFIG_MEMORY_HOTPLUG
@@ -339,6 +342,9 @@ void pmd_fragment_free(unsigned long *pmd)
{
struct page *page = virt_to_page(pmd);
+ if (PageReserved(page))
+ return free_reserved_page(page);
+
BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
if (atomic_dec_and_test(&page->pt_frag_refcount)) {
pgtable_pmd_page_dtor(page);
@@ -356,7 +362,7 @@ static inline void pgtable_free(void *table, int index)
pmd_fragment_free(table);
break;
case PUD_INDEX:
- kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
+ __pud_free(table);
break;
#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
/* 16M hugepd directory at pud level */
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index d174106bab67..69a6b87f2bb4 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -10,58 +10,103 @@
#include <asm/mmu.h>
#include <asm/setup.h>
#include <linux/pkeys.h>
-#include <linux/of_device.h>
+#include <linux/of_fdt.h>
-DEFINE_STATIC_KEY_TRUE(pkey_disabled);
-int pkeys_total; /* Total pkeys as per device tree */
-u32 initial_allocation_mask; /* Bits set for the initially allocated keys */
-u32 reserved_allocation_mask; /* Bits set for reserved keys */
-static bool pkey_execute_disable_supported;
-static bool pkeys_devtree_defined; /* property exported by device tree */
-static u64 pkey_amr_mask; /* Bits in AMR not to be touched */
-static u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
-static u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */
+int num_pkey; /* Max number of pkeys supported */
+/*
+ * Keys marked in the reservation list cannot be allocated by userspace
+ */
+u32 reserved_allocation_mask __ro_after_init;
+
+/* Bits set for the initially allocated keys */
+static u32 initial_allocation_mask __ro_after_init;
+
+/*
+ * Even if we allocate keys with sys_pkey_alloc(), we need to make sure
+ * other thread still find the access denied using the same keys.
+ */
+static u64 default_amr = ~0x0UL;
+static u64 default_iamr = 0x5555555555555555UL;
+u64 default_uamor __ro_after_init;
+/*
+ * Key used to implement PROT_EXEC mmap. Denies READ/WRITE
+ * We pick key 2 because 0 is special key and 1 is reserved as per ISA.
+ */
static int execute_only_key = 2;
+static bool pkey_execute_disable_supported;
+
#define AMR_BITS_PER_PKEY 2
#define AMR_RD_BIT 0x1UL
#define AMR_WR_BIT 0x2UL
#define IAMR_EX_BIT 0x1UL
-#define PKEY_REG_BITS (sizeof(u64)*8)
+#define PKEY_REG_BITS (sizeof(u64) * 8)
#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
-static void scan_pkey_feature(void)
+static int __init dt_scan_storage_keys(unsigned long node,
+ const char *uname, int depth,
+ void *data)
{
- u32 vals[2];
- struct device_node *cpu;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *prop;
+ int *pkeys_total = (int *) data;
- cpu = of_find_node_by_type(NULL, "cpu");
- if (!cpu)
- return;
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
- if (of_property_read_u32_array(cpu,
- "ibm,processor-storage-keys", vals, 2))
- return;
+ prop = of_get_flat_dt_prop(node, "ibm,processor-storage-keys", NULL);
+ if (!prop)
+ return 0;
+ *pkeys_total = be32_to_cpu(prop[0]);
+ return 1;
+}
+
+static int scan_pkey_feature(void)
+{
+ int ret;
+ int pkeys_total = 0;
/*
- * Since any pkey can be used for data or execute, we will just treat
- * all keys as equal and track them as one entity.
+ * Pkey is not supported with Radix translation.
*/
- pkeys_total = vals[0];
- pkeys_devtree_defined = true;
-}
+ if (early_radix_enabled())
+ return 0;
-static inline bool pkey_mmu_enabled(void)
-{
- if (firmware_has_feature(FW_FEATURE_LPAR))
- return pkeys_total;
- else
- return cpu_has_feature(CPU_FTR_PKEY);
+ /*
+ * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1
+ */
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_206))
+ return 0;
+
+ ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total);
+ if (ret == 0) {
+ /*
+ * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device
+ * tree. We make this exception since some version of skiboot forgot to
+ * expose this property on power8/9.
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ unsigned long pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E ||
+ PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9)
+ pkeys_total = 32;
+ }
+ }
+
+ /*
+ * Adjust the upper limit, based on the number of bits supported by
+ * arch-neutral code.
+ */
+ pkeys_total = min_t(int, pkeys_total,
+ ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1));
+ return pkeys_total;
}
-static int pkey_initialize(void)
+void __init pkey_early_init_devtree(void)
{
- int os_reserved, i;
+ int pkeys_total, i;
/*
* We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
@@ -80,31 +125,14 @@ static int pkey_initialize(void)
!= (sizeof(u64) * BITS_PER_BYTE));
/* scan the device tree for pkey feature */
- scan_pkey_feature();
-
- /*
- * Let's assume 32 pkeys on P8 bare metal, if its not defined by device
- * tree. We make this exception since skiboot forgot to expose this
- * property on power8.
- */
- if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) &&
- cpu_has_feature(CPU_FTRS_POWER8))
- pkeys_total = 32;
-
- /*
- * Adjust the upper limit, based on the number of bits supported by
- * arch-neutral code.
- */
- pkeys_total = min_t(int, pkeys_total,
- ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1));
+ pkeys_total = scan_pkey_feature();
+ if (!pkeys_total)
+ goto out;
- if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
- static_branch_enable(&pkey_disabled);
- else
- static_branch_disable(&pkey_disabled);
+ /* Allow all keys to be modified by default */
+ default_uamor = ~0x0UL;
- if (static_branch_likely(&pkey_disabled))
- return 0;
+ cur_cpu_spec->mmu_features |= MMU_FTR_PKEY;
/*
* The device tree cannot be relied to indicate support for
@@ -118,53 +146,86 @@ static int pkey_initialize(void)
#ifdef CONFIG_PPC_4K_PAGES
/*
* The OS can manage only 8 pkeys due to its inability to represent them
- * in the Linux 4K PTE.
+ * in the Linux 4K PTE. Mark all other keys reserved.
*/
- os_reserved = pkeys_total - 8;
+ num_pkey = min(8, pkeys_total);
#else
- os_reserved = 0;
+ num_pkey = pkeys_total;
#endif
- /* Bits are in LE format. */
- reserved_allocation_mask = (0x1 << 1) | (0x1 << execute_only_key);
-
- /* register mask is in BE format */
- pkey_amr_mask = ~0x0ul;
- pkey_amr_mask &= ~(0x3ul << pkeyshift(0));
-
- pkey_iamr_mask = ~0x0ul;
- pkey_iamr_mask &= ~(0x3ul << pkeyshift(0));
- pkey_iamr_mask &= ~(0x3ul << pkeyshift(execute_only_key));
- pkey_uamor_mask = ~0x0ul;
- pkey_uamor_mask &= ~(0x3ul << pkeyshift(0));
- pkey_uamor_mask &= ~(0x3ul << pkeyshift(execute_only_key));
-
- /* mark the rest of the keys as reserved and hence unavailable */
- for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) {
- reserved_allocation_mask |= (0x1 << i);
- pkey_uamor_mask &= ~(0x3ul << pkeyshift(i));
- }
- initial_allocation_mask = reserved_allocation_mask | (0x1 << 0);
-
- if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) {
+ if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) {
/*
* Insufficient number of keys to support
* execute only key. Mark it unavailable.
- * Any AMR, UAMOR, IAMR bit set for
- * this key is irrelevant since this key
- * can never be allocated.
*/
execute_only_key = -1;
+ } else {
+ /*
+ * Mark the execute_only_pkey as not available for
+ * user allocation via pkey_alloc.
+ */
+ reserved_allocation_mask |= (0x1 << execute_only_key);
+
+ /*
+ * Deny READ/WRITE for execute_only_key.
+ * Allow execute in IAMR.
+ */
+ default_amr |= (0x3ul << pkeyshift(execute_only_key));
+ default_iamr &= ~(0x1ul << pkeyshift(execute_only_key));
+
+ /*
+ * Clear the uamor bits for this key.
+ */
+ default_uamor &= ~(0x3ul << pkeyshift(execute_only_key));
}
- return 0;
-}
+ /*
+ * Allow access for only key 0. And prevent any other modification.
+ */
+ default_amr &= ~(0x3ul << pkeyshift(0));
+ default_iamr &= ~(0x1ul << pkeyshift(0));
+ default_uamor &= ~(0x3ul << pkeyshift(0));
+ /*
+ * key 0 is special in that we want to consider it an allocated
+ * key which is preallocated. We don't allow changing AMR bits
+ * w.r.t key 0. But one can pkey_free(key0)
+ */
+ initial_allocation_mask |= (0x1 << 0);
-arch_initcall(pkey_initialize);
+ /*
+ * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
+ * programming note.
+ */
+ reserved_allocation_mask |= (0x1 << 1);
+ default_uamor &= ~(0x3ul << pkeyshift(1));
+
+ /*
+ * Prevent the usage of OS reserved keys. Update UAMOR
+ * for those keys. Also mark the rest of the bits in the
+ * 32 bit mask as reserved.
+ */
+ for (i = num_pkey; i < 32 ; i++) {
+ reserved_allocation_mask |= (0x1 << i);
+ default_uamor &= ~(0x3ul << pkeyshift(i));
+ }
+ /*
+ * Prevent the allocation of reserved keys too.
+ */
+ initial_allocation_mask |= reserved_allocation_mask;
+
+ pr_info("Enabling pkeys with max key count %d\n", num_pkey);
+out:
+ /*
+ * Setup uamor on boot cpu
+ */
+ mtspr(SPRN_UAMOR, default_uamor);
+
+ return;
+}
void pkey_mm_init(struct mm_struct *mm)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
mm_pkey_allocation_map(mm) = initial_allocation_mask;
mm->context.execute_only_pkey = execute_only_key;
@@ -196,30 +257,6 @@ static inline void write_iamr(u64 value)
mtspr(SPRN_IAMR, value);
}
-static inline u64 read_uamor(void)
-{
- return mfspr(SPRN_UAMOR);
-}
-
-static inline void write_uamor(u64 value)
-{
- mtspr(SPRN_UAMOR, value);
-}
-
-static bool is_pkey_enabled(int pkey)
-{
- u64 uamor = read_uamor();
- u64 pkey_bits = 0x3ul << pkeyshift(pkey);
- u64 uamor_pkey_bits = (uamor & pkey_bits);
-
- /*
- * Both the bits in UAMOR corresponding to the key should be set or
- * reset.
- */
- WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits));
- return !!(uamor_pkey_bits);
-}
-
static inline void init_amr(int pkey, u8 init_bits)
{
u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
@@ -245,8 +282,18 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
{
u64 new_amr_bits = 0x0ul;
u64 new_iamr_bits = 0x0ul;
+ u64 pkey_bits, uamor_pkey_bits;
+
+ /*
+ * Check whether the key is disabled by UAMOR.
+ */
+ pkey_bits = 0x3ul << pkeyshift(pkey);
+ uamor_pkey_bits = (default_uamor & pkey_bits);
- if (!is_pkey_enabled(pkey))
+ /*
+ * Both the bits in UAMOR corresponding to the key should be set
+ */
+ if (uamor_pkey_bits != pkey_bits)
return -EINVAL;
if (init_val & PKEY_DISABLE_EXECUTE) {
@@ -268,7 +315,7 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
void thread_pkey_regs_save(struct thread_struct *thread)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
/*
@@ -276,38 +323,33 @@ void thread_pkey_regs_save(struct thread_struct *thread)
*/
thread->amr = read_amr();
thread->iamr = read_iamr();
- thread->uamor = read_uamor();
}
void thread_pkey_regs_restore(struct thread_struct *new_thread,
struct thread_struct *old_thread)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
if (old_thread->amr != new_thread->amr)
write_amr(new_thread->amr);
if (old_thread->iamr != new_thread->iamr)
write_iamr(new_thread->iamr);
- if (old_thread->uamor != new_thread->uamor)
- write_uamor(new_thread->uamor);
}
void thread_pkey_regs_init(struct thread_struct *thread)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
- thread->amr = pkey_amr_mask;
- thread->iamr = pkey_iamr_mask;
- thread->uamor = pkey_uamor_mask;
+ thread->amr = default_amr;
+ thread->iamr = default_iamr;
- write_uamor(pkey_uamor_mask);
- write_amr(pkey_amr_mask);
- write_iamr(pkey_iamr_mask);
+ write_amr(default_amr);
+ write_iamr(default_iamr);
}
-int __execute_only_pkey(struct mm_struct *mm)
+int execute_only_pkey(struct mm_struct *mm)
{
return mm->context.execute_only_pkey;
}
@@ -366,7 +408,7 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute)
bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return true;
return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
@@ -383,7 +425,7 @@ bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
bool execute, bool foreign)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return true;
/*
* Do not enforce our key-permissions on a foreign vma.
@@ -396,7 +438,7 @@ bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm)
{
- if (static_branch_likely(&pkey_disabled))
+ if (!mmu_has_feature(MMU_FTR_PKEY))
return;
/* Duplicate the oldmm pkey state in mm: */
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index c812b401b66c..cb91071eef52 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -2,7 +2,6 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/security.h>
-#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/machdep.h>
#include <asm/mman.h>
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index bb00e0cba119..28c784976bed 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -15,7 +15,7 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/string_helpers.h>
-#include <linux/stop_machine.h>
+#include <linux/memory.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
@@ -34,6 +34,7 @@
unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
+unsigned int radix_mem_block_size __ro_after_init;
static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
@@ -56,6 +57,13 @@ static __ref void *early_alloc_pgtable(unsigned long size, int nid,
return ptr;
}
+/*
+ * When allocating pud or pmd pointers, we allocate a complete page
+ * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This
+ * is to ensure that the page obtained from the memblock allocator
+ * can be completely used as page table page and can be freed
+ * correctly when the page table entries are removed.
+ */
static int early_map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags,
unsigned int map_page_size,
@@ -72,8 +80,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
pgdp = pgd_offset_k(ea);
p4dp = p4d_offset(pgdp, ea);
if (p4d_none(*p4dp)) {
- pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
- region_start, region_end);
+ pudp = early_alloc_pgtable(PAGE_SIZE, nid,
+ region_start, region_end);
p4d_populate(&init_mm, p4dp, pudp);
}
pudp = pud_offset(p4dp, ea);
@@ -82,8 +90,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
goto set_the_pte;
}
if (pud_none(*pudp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
- region_start, region_end);
+ pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start,
+ region_end);
pud_populate(&init_mm, pudp, pmdp);
}
pmdp = pmd_offset(pudp, ea);
@@ -259,6 +267,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
static int __meminit create_physical_mapping(unsigned long start,
unsigned long end,
+ unsigned long max_mapping_size,
int nid, pgprot_t _prot)
{
unsigned long vaddr, addr, mapping_size = 0;
@@ -272,6 +281,8 @@ static int __meminit create_physical_mapping(unsigned long start,
int rc;
gap = next_boundary(addr, end) - addr;
+ if (gap > max_mapping_size)
+ gap = max_mapping_size;
previous_size = mapping_size;
prev_exec = exec;
@@ -322,8 +333,9 @@ static void __init radix_init_pgtable(void)
/* We don't support slb for radix */
mmu_slb_size = 0;
+
/*
- * Create the linear mapping, using standard page size for now
+ * Create the linear mapping
*/
for_each_memblock(memory, reg) {
/*
@@ -339,6 +351,7 @@ static void __init radix_init_pgtable(void)
WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size,
+ radix_mem_block_size,
-1, PAGE_KERNEL));
}
@@ -479,6 +492,57 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
return 1;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+ depth, void *data)
+{
+ unsigned long *mem_block_size = (unsigned long *)data;
+ const __be64 *prop;
+ int len;
+
+ if (depth != 1)
+ return 0;
+
+ if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
+ if (!prop || len < sizeof(__be64))
+ /*
+ * Nothing in the device tree
+ */
+ *mem_block_size = MIN_MEMORY_BLOCK_SIZE;
+ else
+ *mem_block_size = be64_to_cpup(prop);
+ return 1;
+}
+
+static unsigned long radix_memory_block_size(void)
+{
+ unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
+
+ /*
+ * OPAL firmware feature is set by now. Hence we are ok
+ * to test OPAL feature.
+ */
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ mem_block_size = 1UL * 1024 * 1024 * 1024;
+ else
+ of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
+
+ return mem_block_size;
+}
+
+#else /* CONFIG_MEMORY_HOTPLUG */
+
+static unsigned long radix_memory_block_size(void)
+{
+ return 1UL * 1024 * 1024 * 1024;
+}
+
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+
void __init radix__early_init_devtree(void)
{
int rc;
@@ -487,17 +551,27 @@ void __init radix__early_init_devtree(void)
* Try to find the available page sizes in the device-tree
*/
rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
- if (rc != 0) /* Found */
- goto found;
+ if (!rc) {
+ /*
+ * No page size details found in device tree.
+ * Let's assume we have page 4k and 64k support
+ */
+ mmu_psize_defs[MMU_PAGE_4K].shift = 12;
+ mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+
+ mmu_psize_defs[MMU_PAGE_64K].shift = 16;
+ mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+ }
+
/*
- * let's assume we have page 4k and 64k support
+ * Max mapping size used when mapping pages. We don't use
+ * ppc_md.memory_block_size() here because this get called
+ * early and we don't have machine probe called yet. Also
+ * the pseries implementation only check for ibm,lmb-size.
+ * All hypervisor supporting radix do expose that device
+ * tree node.
*/
- mmu_psize_defs[MMU_PAGE_4K].shift = 12;
- mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
-
- mmu_psize_defs[MMU_PAGE_64K].shift = 16;
- mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
-found:
+ radix_mem_block_size = radix_memory_block_size();
return;
}
@@ -519,8 +593,10 @@ void setup_kuep(bool disabled)
if (disabled || !early_radix_enabled())
return;
- if (smp_processor_id() == boot_cpuid)
+ if (smp_processor_id() == boot_cpuid) {
pr_info("Activating Kernel Userspace Execution Prevention\n");
+ cur_cpu_spec->mmu_features |= MMU_FTR_KUEP;
+ }
/*
* Radix always uses key0 of the IAMR to determine if an access is
@@ -544,6 +620,10 @@ void setup_kuap(bool disabled)
/* Make sure userspace can't change the AMR */
mtspr(SPRN_UAMOR, 0);
+
+ /*
+ * Set the default kernel AMR values on all cpus.
+ */
mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
isync();
}
@@ -700,30 +780,19 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
pud_clear(pud);
}
-struct change_mapping_params {
- pte_t *pte;
- unsigned long start;
- unsigned long end;
- unsigned long aligned_start;
- unsigned long aligned_end;
-};
-
-static int __meminit stop_machine_change_mapping(void *data)
+static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
{
- struct change_mapping_params *params =
- (struct change_mapping_params *)data;
+ pud_t *pud;
+ int i;
- if (!data)
- return -1;
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ pud = pud_start + i;
+ if (!pud_none(*pud))
+ return;
+ }
- spin_unlock(&init_mm.page_table_lock);
- pte_clear(&init_mm, params->aligned_start, params->pte);
- create_physical_mapping(__pa(params->aligned_start),
- __pa(params->start), -1, PAGE_KERNEL);
- create_physical_mapping(__pa(params->end), __pa(params->aligned_end),
- -1, PAGE_KERNEL);
- spin_lock(&init_mm.page_table_lock);
- return 0;
+ pud_free(&init_mm, pud_start);
+ p4d_clear(p4d);
}
static void remove_pte_table(pte_t *pte_start, unsigned long addr,
@@ -754,53 +823,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
}
}
-/*
- * clear the pte and potentially split the mapping helper
- */
-static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
- unsigned long size, pte_t *pte)
-{
- unsigned long mask = ~(size - 1);
- unsigned long aligned_start = addr & mask;
- unsigned long aligned_end = addr + size;
- struct change_mapping_params params;
- bool split_region = false;
-
- if ((end - addr) < size) {
- /*
- * We're going to clear the PTE, but not flushed
- * the mapping, time to remap and flush. The
- * effects if visible outside the processor or
- * if we are running in code close to the
- * mapping we cleared, we are in trouble.
- */
- if (overlaps_kernel_text(aligned_start, addr) ||
- overlaps_kernel_text(end, aligned_end)) {
- /*
- * Hack, just return, don't pte_clear
- */
- WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
- "text, not splitting\n", addr, end);
- return;
- }
- split_region = true;
- }
-
- if (split_region) {
- params.pte = pte;
- params.start = addr;
- params.end = end;
- params.aligned_start = addr & ~(size - 1);
- params.aligned_end = min_t(unsigned long, aligned_end,
- (unsigned long)__va(memblock_end_of_DRAM()));
- stop_machine(stop_machine_change_mapping, &params, NULL);
- return;
- }
-
- pte_clear(&init_mm, addr, pte);
-}
-
-static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
+static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
unsigned long end)
{
unsigned long next;
@@ -815,7 +838,12 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
continue;
if (pmd_is_leaf(*pmd)) {
- split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
+ !IS_ALIGNED(next, PMD_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
continue;
}
@@ -825,7 +853,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
}
}
-static void remove_pud_table(pud_t *pud_start, unsigned long addr,
+static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
unsigned long end)
{
unsigned long next;
@@ -840,7 +868,12 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
continue;
if (pud_is_leaf(*pud)) {
- split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
+ if (!IS_ALIGNED(addr, PUD_SIZE) ||
+ !IS_ALIGNED(next, PUD_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+ pte_clear(&init_mm, addr, (pte_t *)pud);
continue;
}
@@ -868,12 +901,19 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
continue;
if (p4d_is_leaf(*p4d)) {
- split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d);
+ if (!IS_ALIGNED(addr, P4D_SIZE) ||
+ !IS_ALIGNED(next, P4D_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+
+ pte_clear(&init_mm, addr, (pte_t *)pgd);
continue;
}
pud_base = (pud_t *)p4d_page_vaddr(*p4d);
remove_pud_table(pud_base, addr, next);
+ free_pud_table(pud_base, p4d);
}
spin_unlock(&init_mm.page_table_lock);
@@ -889,7 +929,8 @@ int __meminit radix__create_section_mapping(unsigned long start,
return -1;
}
- return create_physical_mapping(__pa(start), __pa(end), nid, prot);
+ return create_physical_mapping(__pa(start), __pa(end),
+ radix_mem_block_size, nid, prot);
}
int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index b5cc9b23cf02..0d233763441f 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -16,6 +16,7 @@
#include <asm/tlbflush.h>
#include <asm/trace.h>
#include <asm/cputhreads.h>
+#include <asm/plpar_wrappers.h>
#define RIC_FLUSH_TLB 0
#define RIC_FLUSH_PWC 1
@@ -694,7 +695,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
goto local;
}
- if (cputlb_use_tlbie()) {
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+ H_RPTI_PAGE_ALL, 0, -1UL);
+ } else if (cputlb_use_tlbie()) {
if (mm_needs_flush_escalation(mm))
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
@@ -727,7 +735,16 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
goto local;
}
}
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type,
+ H_RPTI_PAGE_ALL, 0, -1UL);
+ } else if (cputlb_use_tlbie())
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
@@ -760,7 +777,19 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
exit_flush_lazy_tlbs(mm);
goto local;
}
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt, pg_sizes, size;
+
+ tgt = H_RPTI_TARGET_CMMU;
+ pg_sizes = psize_to_rpti_pgsize(psize);
+ size = 1UL << mmu_psize_to_shift(psize);
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+ pg_sizes, vmaddr,
+ vmaddr + size);
+ } else if (cputlb_use_tlbie())
_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
else
_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
@@ -810,7 +839,14 @@ static inline void _tlbiel_kernel_broadcast(void)
*/
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
+ unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+
+ pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
+ start, end);
+ } else if (cputlb_use_tlbie())
_tlbie_pid(0, RIC_FLUSH_ALL);
else
_tlbiel_kernel_broadcast();
@@ -864,7 +900,17 @@ is_local:
nr_pages > tlb_local_single_page_flush_ceiling);
}
- if (full) {
+ if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
+ start, end);
+ } else if (full) {
if (local) {
_tlbiel_pid(pid, RIC_FLUSH_TLB);
} else {
@@ -1046,7 +1092,17 @@ is_local:
nr_pages > tlb_local_single_page_flush_ceiling);
}
- if (full) {
+ if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long type = H_RPTI_TYPE_TLB;
+ unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
+
+ if (also_pwc)
+ type |= H_RPTI_TYPE_PWC;
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
+ } else if (full) {
if (local) {
_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
} else {
@@ -1111,7 +1167,19 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
exit_flush_lazy_tlbs(mm);
goto local;
}
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt, type, pg_sizes;
+
+ tgt = H_RPTI_TARGET_CMMU;
+ type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+ pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
+ addr, end);
+ } else if (cputlb_use_tlbie())
_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
else
_tlbiel_va_range_multicast(mm,