aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S2
-rw-r--r--arch/powerpc/kernel/dma-iommu.c3
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c8
-rw-r--r--arch/powerpc/kernel/eeh.c13
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S96
-rw-r--r--arch/powerpc/kernel/head_8xx.S10
-rw-r--r--arch/powerpc/kernel/iommu.c4
-rw-r--r--arch/powerpc/kernel/kprobes.c3
-rw-r--r--arch/powerpc/kernel/machine_kexec.c8
-rw-r--r--arch/powerpc/kernel/module_64.c2
-rw-r--r--arch/powerpc/kernel/prom.c21
-rw-r--r--arch/powerpc/kernel/rtas.c153
-rw-r--r--arch/powerpc/kernel/setup_64.c122
-rw-r--r--arch/powerpc/kernel/smp.c6
-rw-r--r--arch/powerpc/kernel/sysfs.c42
-rw-r--r--arch/powerpc/kernel/tau_6xx.c82
-rw-r--r--arch/powerpc/kernel/traps.c11
-rw-r--r--arch/powerpc/kernel/vdso.c2
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S14
19 files changed, 446 insertions, 156 deletions
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 9daede99c131..f52e2c55a12b 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -189,7 +189,7 @@ __init_LPCR_ISA300:
__init_FSCR:
mfspr r3,SPRN_FSCR
- ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB
+ ori r3,r3,FSCR_TAR|FSCR_EBB
mtspr SPRN_FSCR,r3
blr
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 66f33e7f8d40..a39a1a0d844e 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -100,7 +100,8 @@ static u64 dma_iommu_get_required_mask(struct device *dev)
if (!tbl)
return 0;
- mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
+ mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) +
+ tbl->it_page_shift - 1);
mask += mask - 1;
return mask;
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 7ed2b1b6643c..09134df01bfd 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -385,6 +385,14 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f)
{
u64 lpcr;
+ /*
+ * Linux relies on FSCR[DSCR] being clear, so that we can take the
+ * facility unavailable interrupt and track the task's usage of DSCR.
+ * See facility_unavailable_exception().
+ * Clear the bit here so that feat_enable() doesn't set it.
+ */
+ f->fscr_bit_nr = -1;
+
feat_enable(f);
lpcr = mfspr(SPRN_LPCR);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index d2ba7936d0d3..3f3f08d42266 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -365,14 +365,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
pa = pte_pfn(*ptep);
/* On radix we can do hugepage mappings for io, so handle that */
- if (hugepage_shift) {
- pa <<= hugepage_shift;
- pa |= token & ((1ul << hugepage_shift) - 1);
- } else {
- pa <<= PAGE_SHIFT;
- pa |= token & (PAGE_SIZE - 1);
- }
+ if (!hugepage_shift)
+ hugepage_shift = PAGE_SHIFT;
+ pa <<= PAGE_SHIFT;
+ pa |= token & ((1ul << hugepage_shift) - 1);
return pa;
}
@@ -506,7 +503,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
rc = 1;
if (pe->state & EEH_PE_ISOLATED) {
pe->check_count++;
- if (pe->check_count % EEH_MAX_FAILS == 0) {
+ if (pe->check_count == EEH_MAX_FAILS) {
dn = pci_device_to_OF_node(dev);
if (dn)
location = of_get_property(dn, "ibm,loc-code",
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index cdc53fd90597..b313628966ad 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -484,7 +484,7 @@ EXC_COMMON_BEGIN(unrecover_mce)
b 1b
-EXC_REAL(data_access, 0x300, 0x80)
+EXC_REAL_OOL(data_access, 0x300, 0x80)
EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
@@ -516,13 +516,16 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
+ b tramp_data_access_slb
+EXC_REAL_END(data_access_slb, 0x380, 0x80)
+
+TRAMP_REAL_BEGIN(tramp_data_access_slb)
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
mr r12,r3 /* save r3 */
mfspr r3,SPRN_DAR
mfspr r11,SPRN_SRR1
crset 4*cr6+eq
BRANCH_TO_COMMON(r10, slb_miss_common)
-EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
SET_SCRATCH0(r13)
@@ -537,7 +540,7 @@ EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
-EXC_REAL(instruction_access, 0x400, 0x80)
+EXC_REAL_OOL(instruction_access, 0x400, 0x80)
EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
TRAMP_KVM(PACA_EXGEN, 0x400)
@@ -560,13 +563,16 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
+ b tramp_instruction_access_slb
+EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
+
+TRAMP_REAL_BEGIN(tramp_instruction_access_slb)
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
mr r12,r3 /* save r3 */
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
mfspr r11,SPRN_SRR1
crclr 4*cr6+eq
BRANCH_TO_COMMON(r10, slb_miss_common)
-EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
SET_SCRATCH0(r13)
@@ -830,13 +836,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
+EXC_VIRT_OOL_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
TRAMP_KVM(PACA_EXGEN, 0x900)
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
-EXC_REAL_HV(hdecrementer, 0x980, 0x80)
-EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980)
+EXC_REAL_OOL_HV(hdecrementer, 0x980, 0x80)
+EXC_VIRT_OOL_HV(hdecrementer, 0x4980, 0x80, 0x980)
TRAMP_KVM_HV(PACA_EXGEN, 0x980)
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
@@ -1453,15 +1459,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
.endr
blr
-TRAMP_REAL_BEGIN(rfi_flush_fallback)
- SET_SCRATCH0(r13);
- GET_PACA(r13);
- std r1,PACA_EXRFI+EX_R12(r13)
- ld r1,PACAKSAVE(r13)
- std r9,PACA_EXRFI+EX_R9(r13)
- std r10,PACA_EXRFI+EX_R10(r13)
- std r11,PACA_EXRFI+EX_R11(r13)
- mfctr r9
+/* Clobbers r10, r11, ctr */
+.macro L1D_DISPLACEMENT_FLUSH
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
@@ -1472,7 +1471,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
sync
/*
- * The load adresses are at staggered offsets within cachelines,
+ * The load addresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
@@ -1487,7 +1486,30 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
+.endm
+
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ blr
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r1,PACA_EXRFI+EX_R12(r13)
+ ld r1,PACAKSAVE(r13)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
@@ -1505,32 +1527,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
- ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SIZE(r13)
- srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
- mtctr r11
- DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
-
- /* order ld/st prior to dcbt stop all streams with flushing */
- sync
-
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
-1:
- ld r11,(0x80 + 8)*0(r10)
- ld r11,(0x80 + 8)*1(r10)
- ld r11,(0x80 + 8)*2(r10)
- ld r11,(0x80 + 8)*3(r10)
- ld r11,(0x80 + 8)*4(r10)
- ld r11,(0x80 + 8)*5(r10)
- ld r11,(0x80 + 8)*6(r10)
- ld r11,(0x80 + 8)*7(r10)
- addi r10,r10,0x80*8
- bdnz 1b
-
+ L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
@@ -1539,6 +1536,19 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
GET_SCRATCH0(r13);
hrfid
+USE_TEXT_SECTION()
+
+_GLOBAL(do_uaccess_flush)
+ UACCESS_FLUSH_FIXUP_SECTION
+ nop
+ nop
+ nop
+ blr
+ L1D_DISPLACEMENT_FLUSH
+ blr
+_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
+EXPORT_SYMBOL(do_uaccess_flush)
+
/*
* Real mode exceptions actually use this too, but alternate
* instruction code patches (which end up in the common .text area)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 2d0d89e2cb9a..95ecfec96b49 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -301,7 +301,7 @@ SystemCall:
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
- EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
+ EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD)
. = 0x1100
/*
@@ -398,11 +398,9 @@ _ENTRY(ITLBMiss_cmp)
#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
rlwimi r10, r11, 1, MI_SPS16K
#endif
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ rlwinm r11, r10, 32-11, _PAGE_PRESENT
and r11, r11, r10
rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
li r11, RPN_PATTERN
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 20-23 and 28 must be clear.
@@ -528,11 +526,9 @@ _ENTRY(DTLBMiss_jmp)
* r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
* r10 = (r10 & ~PRESENT) | r11;
*/
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ rlwinm r11, r10, 32-11, _PAGE_PRESENT
and r11, r11, r10
rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 22 and 28 must be clear.
* Software indicator bits 24, 25, 26, and 27 must be
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 80b6caaa9b92..87af91937c8a 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1055,7 +1055,7 @@ int iommu_take_ownership(struct iommu_table *tbl)
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
- spin_lock(&tbl->pools[i].lock);
+ spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
if (tbl->it_offset == 0)
clear_bit(0, tbl->it_map);
@@ -1084,7 +1084,7 @@ void iommu_release_ownership(struct iommu_table *tbl)
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
- spin_lock(&tbl->pools[i].lock);
+ spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
memset(tbl->it_map, 0, sz);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 07d3f3b40246..b8b62df102f1 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -279,7 +279,8 @@ int kprobe_handler(struct pt_regs *regs)
if (user_mode(regs))
return 0;
- if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))
+ if (!IS_ENABLED(CONFIG_BOOKE) &&
+ (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)))
return 0;
/*
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 9dafd7af39b8..cb4d6cd949fc 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -113,11 +113,12 @@ void machine_kexec(struct kimage *image)
void __init reserve_crashkernel(void)
{
- unsigned long long crash_size, crash_base;
+ unsigned long long crash_size, crash_base, total_mem_sz;
int ret;
+ total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size();
/* use common parsing */
- ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ ret = parse_crashkernel(boot_command_line, total_mem_sz,
&crash_size, &crash_base);
if (ret == 0 && crash_size > 0) {
crashk_res.start = crash_base;
@@ -176,6 +177,7 @@ void __init reserve_crashkernel(void)
/* Crash kernel trumps memory limit */
if (memory_limit && memory_limit <= crashk_res.end) {
memory_limit = crashk_res.end + 1;
+ total_mem_sz = memory_limit;
printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
memory_limit);
}
@@ -184,7 +186,7 @@ void __init reserve_crashkernel(void)
"for crashkernel (System RAM: %ldMB)\n",
(unsigned long)(crash_size >> 20),
(unsigned long)(crashk_res.start >> 20),
- (unsigned long)(memblock_phys_mem_size() >> 20));
+ (unsigned long)(total_mem_sz >> 20));
if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
memblock_reserve(crashk_res.start, crash_size)) {
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 4d8f6291b766..bbf033bda55d 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -695,7 +695,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/*
* If found, replace it with:
* addis r2, r12, (.TOC.-func)@ha
- * addi r2, r12, (.TOC.-func)@l
+ * addi r2, r2, (.TOC.-func)@l
*/
((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value);
((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d96b28415090..99409d0420d2 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -265,7 +265,7 @@ static struct feature_property {
};
#if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU)
-static inline void identical_pvr_fixup(unsigned long node)
+static __init void identical_pvr_fixup(unsigned long node)
{
unsigned int pvr;
const char *model = of_get_flat_dt_prop(node, "model", NULL);
@@ -658,6 +658,23 @@ static void __init early_reserve_mem(void)
#endif
}
+#ifdef CONFIG_PPC64
+static void __init save_fscr_to_task(void)
+{
+ /*
+ * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we
+ * have configured via the device tree features or via __init_FSCR().
+ * That value will then be propagated to pid 1 (init) and all future
+ * processes.
+ */
+ if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ init_task.thread.fscr = mfspr(SPRN_FSCR);
+}
+#else
+static inline void save_fscr_to_task(void) {};
+#endif
+
+
void __init early_init_devtree(void *params)
{
phys_addr_t limit;
@@ -743,6 +760,8 @@ void __init early_init_devtree(void *params)
BUG();
}
+ save_fscr_to_task();
+
#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
/* We'll later wait for secondaries to check in; there are
* NCPUS-1 non-boot CPUs :-)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index a01f83ba739e..55b266d7afe1 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1056,6 +1056,147 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
return NULL;
}
+#ifdef CONFIG_PPC_RTAS_FILTER
+
+/*
+ * The sys_rtas syscall, as originally designed, allows root to pass
+ * arbitrary physical addresses to RTAS calls. A number of RTAS calls
+ * can be abused to write to arbitrary memory and do other things that
+ * are potentially harmful to system integrity, and thus should only
+ * be used inside the kernel and not exposed to userspace.
+ *
+ * All known legitimate users of the sys_rtas syscall will only ever
+ * pass addresses that fall within the RMO buffer, and use a known
+ * subset of RTAS calls.
+ *
+ * Accordingly, we filter RTAS requests to check that the call is
+ * permitted, and that provided pointers fall within the RMO buffer.
+ * The rtas_filters list contains an entry for each permitted call,
+ * with the indexes of the parameters which are expected to contain
+ * addresses and sizes of buffers allocated inside the RMO buffer.
+ */
+struct rtas_filter {
+ const char *name;
+ int token;
+ /* Indexes into the args buffer, -1 if not used */
+ int buf_idx1;
+ int size_idx1;
+ int buf_idx2;
+ int size_idx2;
+
+ int fixed_size;
+};
+
+static struct rtas_filter rtas_filters[] __ro_after_init = {
+ { "ibm,activate-firmware", -1, -1, -1, -1, -1 },
+ { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */
+ { "display-character", -1, -1, -1, -1, -1 },
+ { "ibm,display-message", -1, 0, -1, -1, -1 },
+ { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 },
+ { "ibm,close-errinjct", -1, -1, -1, -1, -1 },
+ { "ibm,open-errinjct", -1, -1, -1, -1, -1 },
+ { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 },
+ { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 },
+ { "ibm,get-indices", -1, 2, 3, -1, -1 },
+ { "get-power-level", -1, -1, -1, -1, -1 },
+ { "get-sensor-state", -1, -1, -1, -1, -1 },
+ { "ibm,get-system-parameter", -1, 1, 2, -1, -1 },
+ { "get-time-of-day", -1, -1, -1, -1, -1 },
+ { "ibm,get-vpd", -1, 0, -1, 1, 2 },
+ { "ibm,lpar-perftools", -1, 2, 3, -1, -1 },
+ { "ibm,platform-dump", -1, 4, 5, -1, -1 },
+ { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 },
+ { "ibm,scan-log-dump", -1, 0, 1, -1, -1 },
+ { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 },
+ { "ibm,set-eeh-option", -1, -1, -1, -1, -1 },
+ { "set-indicator", -1, -1, -1, -1, -1 },
+ { "set-power-level", -1, -1, -1, -1, -1 },
+ { "set-time-for-power-on", -1, -1, -1, -1, -1 },
+ { "ibm,set-system-parameter", -1, 1, -1, -1, -1 },
+ { "set-time-of-day", -1, -1, -1, -1, -1 },
+ { "ibm,suspend-me", -1, -1, -1, -1, -1 },
+ { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 },
+ { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 },
+ { "ibm,physical-attestation", -1, 0, 1, -1, -1 },
+};
+
+static bool in_rmo_buf(u32 base, u32 end)
+{
+ return base >= rtas_rmo_buf &&
+ base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) &&
+ base <= end &&
+ end >= rtas_rmo_buf &&
+ end < (rtas_rmo_buf + RTAS_RMOBUF_MAX);
+}
+
+static bool block_rtas_call(int token, int nargs,
+ struct rtas_args *args)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
+ struct rtas_filter *f = &rtas_filters[i];
+ u32 base, size, end;
+
+ if (token != f->token)
+ continue;
+
+ if (f->buf_idx1 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx1]);
+ if (f->size_idx1 != -1)
+ size = be32_to_cpu(args->args[f->size_idx1]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+
+ end = base + size - 1;
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ if (f->buf_idx2 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx2]);
+ if (f->size_idx2 != -1)
+ size = be32_to_cpu(args->args[f->size_idx2]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+ end = base + size - 1;
+
+ /*
+ * Special case for ibm,configure-connector where the
+ * address can be 0
+ */
+ if (!strcmp(f->name, "ibm,configure-connector") &&
+ base == 0)
+ return false;
+
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ return false;
+ }
+
+err:
+ pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
+ pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n",
+ token, nargs, current->comm);
+ return true;
+}
+
+#else
+
+static bool block_rtas_call(int token, int nargs,
+ struct rtas_args *args)
+{
+ return false;
+}
+
+#endif /* CONFIG_PPC_RTAS_FILTER */
+
/* We assume to be passed big endian arguments */
asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
{
@@ -1093,6 +1234,9 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
args.rets = &args.args[nargs];
memset(args.rets, 0, nret * sizeof(rtas_arg_t));
+ if (block_rtas_call(token, nargs, &args))
+ return -EINVAL;
+
/* Need to handle ibm,suspend_me call specially */
if (token == ibm_suspend_me_token) {
@@ -1154,6 +1298,9 @@ void __init rtas_initialize(void)
unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
u32 base, size, entry;
int no_base, no_size, no_entry;
+#ifdef CONFIG_PPC_RTAS_FILTER
+ int i;
+#endif
/* Get RTAS dev node and fill up our "rtas" structure with infos
* about it.
@@ -1189,6 +1336,12 @@ void __init rtas_initialize(void)
#ifdef CONFIG_RTAS_ERROR_LOGGING
rtas_last_error_token = rtas_token("rtas-last-error");
#endif
+
+#ifdef CONFIG_PPC_RTAS_FILTER
+ for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
+ rtas_filters[i].token = rtas_token(rtas_filters[i].name);
+ }
+#endif
}
int __init early_init_dt_scan_rtas(unsigned long node,
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a1e336901cc8..a1eec409695e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -792,7 +792,13 @@ early_initcall(disable_hardlockup_detector);
static enum l1d_flush_type enabled_flush_types;
static void *l1d_flush_fallback_area;
static bool no_rfi_flush;
+static bool no_entry_flush;
+static bool no_uaccess_flush;
bool rfi_flush;
+bool entry_flush;
+bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
static int __init handle_no_rfi_flush(char *p)
{
@@ -802,6 +808,22 @@ static int __init handle_no_rfi_flush(char *p)
}
early_param("no_rfi_flush", handle_no_rfi_flush);
+static int __init handle_no_entry_flush(char *p)
+{
+ pr_info("entry-flush: disabled on command line.");
+ no_entry_flush = true;
+ return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
+static int __init handle_no_uaccess_flush(char *p)
+{
+ pr_info("uaccess-flush: disabled on command line.");
+ no_uaccess_flush = true;
+ return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
/*
* The RFI flush is not KPTI, but because users will see doco that says to use
* nopti we hijack that option here to also disable the RFI flush.
@@ -833,6 +855,32 @@ void rfi_flush_enable(bool enable)
rfi_flush = enable;
}
+void entry_flush_enable(bool enable)
+{
+ if (enable) {
+ do_entry_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ do_entry_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ entry_flush = enable;
+}
+
+void uaccess_flush_enable(bool enable)
+{
+ if (enable) {
+ do_uaccess_flush_fixups(enabled_flush_types);
+ static_branch_enable(&uaccess_flush_key);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ static_branch_disable(&uaccess_flush_key);
+ do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ uaccess_flush = enable;
+}
+
static void __ref init_fallback_flush(void)
{
u64 l1d_size, limit;
@@ -874,10 +922,28 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
enabled_flush_types = types;
- if (!no_rfi_flush && !cpu_mitigations_off())
+ if (!cpu_mitigations_off() && !no_rfi_flush)
rfi_flush_enable(enable);
}
+void setup_entry_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_entry_flush)
+ entry_flush_enable(enable);
+}
+
+void setup_uaccess_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_uaccess_flush)
+ uaccess_flush_enable(enable);
+}
+
#ifdef CONFIG_DEBUG_FS
static int rfi_flush_set(void *data, u64 val)
{
@@ -905,9 +971,63 @@ static int rfi_flush_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+static int entry_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != entry_flush)
+ entry_flush_enable(enable);
+
+ return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+ *val = entry_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
+static int uaccess_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != uaccess_flush)
+ uaccess_flush_enable(enable);
+
+ return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+ *val = uaccess_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
static __init int rfi_flush_debugfs_init(void)
{
debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+ debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
+ debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
return 0;
}
device_initcall(rfi_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e0a4c1f82e25..7c7aa7c98ba3 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -985,6 +985,9 @@ void start_secondary(void *unused)
vdso_getcpu_init();
#endif
+ set_numa_node(numa_cpu_lookup_table[cpu]);
+ set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
/* Update topology CPU masks */
add_cpu_to_masks(cpu);
@@ -995,9 +998,6 @@ void start_secondary(void *unused)
if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
shared_caches = true;
- set_numa_node(numa_cpu_lookup_table[cpu]);
- set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
-
smp_wmb();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70c7c2b..952fbc2d4abc 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -28,29 +28,27 @@
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-/*
- * SMT snooze delay stuff, 64-bit only for now
- */
-
#ifdef CONFIG_PPC64
-/* Time in microseconds we delay before sleeping in the idle loop */
-static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
+ * 2014:
+ *
+ * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ * up the kernel code."
+ *
+ * powerpc-utils stopped using it as of 1.3.8. At some point in the future this
+ * code should be removed.
+ */
static ssize_t store_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
- ssize_t ret;
- long snooze;
-
- ret = sscanf(buf, "%ld", &snooze);
- if (ret != 1)
- return -EINVAL;
-
- per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+ pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n",
+ current->comm, current->pid);
return count;
}
@@ -58,9 +56,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
-
- return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+ pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n",
+ current->comm, current->pid);
+ return sprintf(buf, "100\n");
}
static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
@@ -68,16 +66,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
static int __init setup_smt_snooze_delay(char *str)
{
- unsigned int cpu;
- long snooze;
-
if (!cpu_has_feature(CPU_FTR_SMT))
return 1;
- snooze = simple_strtol(str, NULL, 10);
- for_each_possible_cpu(cpu)
- per_cpu(smt_snooze_delay, cpu) = snooze;
-
+ pr_warn("smt-snooze-delay command line option has no effect\n");
return 1;
}
__setup("smt-snooze-delay=", setup_smt_snooze_delay);
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index a3374e8a258c..2615cd66dad8 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -38,8 +38,6 @@ static struct tau_temp
struct timer_list tau_timer;
-#undef DEBUG
-
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
* dynamic adjustment to minimize # of interrupts */
/* configurable values for step size and how much to expand the window when
@@ -72,47 +70,33 @@ void set_thresholds(unsigned long cpu)
void TAUupdate(int cpu)
{
- unsigned thrm;
-
-#ifdef DEBUG
- printk("TAUupdate ");
-#endif
+ u32 thrm;
+ u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
/* if both thresholds are crossed, the step_sizes cancel out
* and the window winds up getting expanded twice. */
- if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed low threshold */
- if (tau[cpu].low >= step_size){
- tau[cpu].low -= step_size;
- tau[cpu].high -= (step_size - window_expand);
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("low threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM1);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM1, 0);
+
+ if (tau[cpu].low >= step_size) {
+ tau[cpu].low -= step_size;
+ tau[cpu].high -= (step_size - window_expand);
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: low threshold crossed\n", __func__);
}
- if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed high threshold */
- if (tau[cpu].high <= 127-step_size){
- tau[cpu].low += (step_size - window_expand);
- tau[cpu].high += step_size;
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("high threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM2);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM2, 0);
+
+ if (tau[cpu].high <= 127 - step_size) {
+ tau[cpu].low += (step_size - window_expand);
+ tau[cpu].high += step_size;
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: high threshold crossed\n", __func__);
}
-
-#ifdef DEBUG
- printk("grew = %d\n", tau[cpu].grew);
-#endif
-
-#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */
- set_thresholds(cpu);
-#endif
-
}
#ifdef CONFIG_TAU_INT
@@ -137,18 +121,18 @@ void TAUException(struct pt_regs * regs)
static void tau_timeout(void * info)
{
int cpu;
- unsigned long flags;
int size;
int shrink;
- /* disabling interrupts *should* be okay */
- local_irq_save(flags);
cpu = smp_processor_id();
#ifndef CONFIG_TAU_INT
TAUupdate(cpu);
#endif
+ /* Stop thermal sensor comparisons and interrupts */
+ mtspr(SPRN_THRM3, 0);
+
size = tau[cpu].high - tau[cpu].low;
if (size > min_window && ! tau[cpu].grew) {
/* do an exponential shrink of half the amount currently over size */
@@ -170,22 +154,12 @@ static void tau_timeout(void * info)
set_thresholds(cpu);
- /*
- * Do the enable every time, since otherwise a bunch of (relatively)
- * complex sleep code needs to be added. One mtspr every time
- * tau_timeout is called is probably not a big deal.
- *
- * Enable thermal sensor and set up sample interval timer
- * need 20 us to do the compare.. until a nice 'cpu_speed' function
- * call is implemented, just assume a 500 mhz clock. It doesn't really
- * matter if we take too long for a compare since it's all interrupt
- * driven anyway.
- *
- * use a extra long time.. (60 us @ 500 mhz)
+ /* Restart thermal sensor comparisons and interrupts.
+ * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
+ * recommends that "the maximum value be set in THRM3 under all
+ * conditions."
*/
- mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E);
-
- local_irq_restore(flags);
+ mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
}
static void tau_timeout_smp(unsigned long unused)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 3c9457420aee..05c1aabad01c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -357,11 +357,14 @@ out:
#ifdef CONFIG_PPC_BOOK3S_64
BUG_ON(get_paca()->in_nmi == 0);
if (get_paca()->in_nmi > 1)
- nmi_panic(regs, "Unrecoverable nested System Reset");
+ die("Unrecoverable nested System Reset", regs, SIGABRT);
#endif
/* Must die if the interrupt is not recoverable */
- if (!(regs->msr & MSR_RI))
- nmi_panic(regs, "Unrecoverable System Reset");
+ if (!(regs->msr & MSR_RI)) {
+ /* For the reason explained in die_mce, nmi_exit before die */
+ nmi_exit();
+ die("Unrecoverable System Reset", regs, SIGABRT);
+ }
if (!nested)
nmi_exit();
@@ -701,7 +704,7 @@ void machine_check_exception(struct pt_regs *regs)
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
- nmi_panic(regs, "Unrecoverable Machine check");
+ die("Unrecoverable Machine check", regs, SIGBUS);
return;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 22b01a3962f0..3edaee28b638 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -704,7 +704,7 @@ int vdso_getcpu_init(void)
node = cpu_to_node(cpu);
WARN_ON_ONCE(node > 0xffff);
- val = (cpu & 0xfff) | ((node & 0xffff) << 16);
+ val = (cpu & 0xffff) | ((node & 0xffff) << 16);
mtspr(SPRN_SPRG_VDSO_WRITE, val);
get_paca()->sprg_vdso = val;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index e4da937d6cf9..efb9a6982561 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -141,6 +141,20 @@ SECTIONS
}
. = ALIGN(8);
+ __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
+ __start___uaccess_flush_fixup = .;
+ *(__uaccess_flush_fixup)
+ __stop___uaccess_flush_fixup = .;
+ }
+
+ . = ALIGN(8);
+ __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
+ __start___entry_flush_fixup = .;
+ *(__entry_flush_fixup)
+ __stop___entry_flush_fixup = .;
+ }
+
+ . = ALIGN(8);
__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
__start___stf_exit_barrier_fixup = .;
*(__stf_exit_barrier_fixup)