diff options
Diffstat (limited to 'arch/powerpc')
30 files changed, 331 insertions, 254 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9fa23eb320ff..cf78ad7ff0b7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -135,7 +135,7 @@ config PPC select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE - select ARCH_HAS_UACCESS_MCSAFE if PPC64 + select ARCH_HAS_COPY_MC if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_KEEP_MEMBLOCK diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 3f9ae3585ab9..80c953414882 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -13,20 +13,19 @@ */ #define MAX_EA_BITS_PER_CONTEXT 46 -#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2) /* - * Our page table limit us to 64TB. Hence for the kernel mapping, - * each MAP area is limited to 16 TB. - * The four map areas are: linear mapping, vmap, IO and vmemmap + * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB + * of vmemmap space. To better support sparse memory layout, we use 61TB + * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap. */ +#define REGION_SHIFT (40) #define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT) /* - * Define the address range of the kernel non-linear virtual area - * 16TB + * Define the address range of the kernel non-linear virtual area (61TB) */ -#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000) +#define H_KERN_VIRT_START ASM_CONST(0xc0003d0000000000) #ifndef __ASSEMBLY__ #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 414d209f45bb..c711fe890110 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -8,14 +8,13 @@ #ifndef _ASM_POWERPC_LMB_H #define _ASM_POWERPC_LMB_H +#include <linux/sched.h> + struct drmem_lmb { u64 base_addr; u32 drc_index; u32 aa_index; u32 flags; -#ifdef CONFIG_MEMORY_HOTPLUG - int nid; -#endif }; struct drmem_lmb_info { @@ -26,8 +25,22 @@ struct drmem_lmb_info { extern struct drmem_lmb_info *drmem_info; +static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb, + const struct drmem_lmb *start) +{ + /* + * DLPAR code paths can take several milliseconds per element + * when interacting with firmware. Ensure that we don't + * unfairly monopolize the CPU. + */ + if (((++lmb - start) % 16) == 0) + cond_resched(); + + return lmb; +} + #define for_each_drmem_lmb_in_range(lmb, start, end) \ - for ((lmb) = (start); (lmb) < (end); (lmb)++) + for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start)) #define for_each_drmem_lmb(lmb) \ for_each_drmem_lmb_in_range((lmb), \ @@ -104,22 +117,4 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0xffffffff; } -#ifdef CONFIG_MEMORY_HOTPLUG -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ - lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ - lmb->nid = -1; -} -#else -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ -} -#endif - #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index cb424799da0d..5a00da670a40 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -40,6 +40,7 @@ struct arch_hw_breakpoint { #else #define HW_BREAKPOINT_SIZE 0x8 #endif +#define HW_BREAKPOINT_SIZE_QUADWORD 0x10 #define DABR_MAX_LEN 8 #define DAWR_MAX_LEN 512 diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 88e6c78100d9..c750afc62887 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -815,7 +815,7 @@ #define THRM1_TIN (1 << 31) #define THRM1_TIV (1 << 30) #define THRM1_THRES(x) ((x&0x7f)<<23) -#define THRM3_SITV(x) ((x&0x3fff)<<1) +#define THRM3_SITV(x) ((x & 0x1fff) << 1) #define THRM1_TID (1<<2) #define THRM1_TIE (1<<1) #define THRM1_V (1<<0) diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index b72692702f35..9bf6dffb4090 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n); #ifndef CONFIG_KASAN #define __HAVE_ARCH_MEMSET32 #define __HAVE_ARCH_MEMSET64 -#define __HAVE_ARCH_MEMCPY_MCSAFE -extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz); extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t); extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t); diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h index 85580b30aba4..7546402d796a 100644 --- a/arch/powerpc/include/asm/svm.h +++ b/arch/powerpc/include/asm/svm.h @@ -15,6 +15,8 @@ static inline bool is_secure_guest(void) return mfmsr() & MSR_S; } +void __init svm_swiotlb_init(void); + void dtl_cache_ctor(void *addr); #define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL) @@ -25,6 +27,8 @@ static inline bool is_secure_guest(void) return false; } +static inline void svm_swiotlb_init(void) {} + #define get_dtl_cache_ctor() NULL #endif /* CONFIG_PPC_SVM */ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 862985cf5180..cf87bbdcfdcb 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -67,19 +67,6 @@ static inline int mm_is_thread_local(struct mm_struct *mm) return false; return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); } -static inline void mm_reset_thread_local(struct mm_struct *mm) -{ - WARN_ON(atomic_read(&mm->context.copros) > 0); - /* - * It's possible for mm_access to take a reference on mm_users to - * access the remote mm from another thread, but it's not allowed - * to set mm_cpumask, so mm_users may be > 1 here. - */ - WARN_ON(current->mm != mm); - atomic_set(&mm->context.active_cpus, 1); - cpumask_clear(mm_cpumask(mm)); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); -} #else /* CONFIG_PPC_BOOK3S_64 */ static inline int mm_is_thread_local(struct mm_struct *mm) { diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 64c04ab09112..97506441c15b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -436,6 +436,32 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); +#ifdef CONFIG_ARCH_HAS_COPY_MC +unsigned long __must_check +copy_mc_generic(void *to, const void *from, unsigned long size); + +static inline unsigned long __must_check +copy_mc_to_kernel(void *to, const void *from, unsigned long size) +{ + return copy_mc_generic(to, from, size); +} +#define copy_mc_to_kernel copy_mc_to_kernel + +static inline unsigned long __must_check +copy_mc_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(check_copy_size(from, n, true))) { + if (access_ok(to, n)) { + allow_write_to_user(to, n); + n = copy_mc_generic((void *)to, from, n); + prevent_write_to_user(to, n); + } + } + + return n; +} +#endif + #ifdef __powerpc64__ static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) @@ -524,20 +550,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) return ret; } -static __always_inline unsigned long __must_check -copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) -{ - if (likely(check_copy_size(from, n, true))) { - if (access_ok(to, n)) { - allow_write_to_user(to, n); - n = memcpy_mcsafe((void *)to, from, n); - prevent_write_to_user(to, n); - } - } - - return n; -} - unsigned long __arch_clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index c55e67bab271..2190be70c7fd 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -519,9 +519,17 @@ static bool ea_hw_range_overlaps(unsigned long ea, int size, struct arch_hw_breakpoint *info) { unsigned long hw_start_addr, hw_end_addr; + unsigned long align_size = HW_BREAKPOINT_SIZE; - hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); - hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); + /* + * On p10 predecessors, quadword is handle differently then + * other instructions. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16) + align_size = HW_BREAKPOINT_SIZE_QUADWORD; + + hw_start_addr = ALIGN_DOWN(info->address, align_size); + hw_end_addr = ALIGN(info->address + info->len, align_size); return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); } @@ -635,6 +643,8 @@ static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, if (*type == CACHEOP) { *size = cache_op_size(); *ea &= ~(*size - 1); + } else if (*type == LOAD_VMX || *type == STORE_VMX) { + *ea &= ~(*size - 1); } } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 05b1cc0e009e..3a22281a8264 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -214,7 +214,7 @@ void replay_soft_interrupts(void) struct pt_regs regs; ppc_save_regs(®s); - regs.softe = IRQS_ALL_DISABLED; + regs.softe = IRQS_ENABLED; again: if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) @@ -368,6 +368,12 @@ notrace void arch_local_irq_restore(unsigned long mask) } } + /* + * Disable preempt here, so that the below preempt_enable will + * perform resched if required (a replayed interrupt may set + * need_resched). + */ + preempt_disable(); irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); @@ -377,6 +383,7 @@ notrace void arch_local_irq_restore(unsigned long mask) trace_hardirqs_on(); irq_soft_mask_set(IRQS_ENABLED); __hard_irq_enable(); + preempt_enable(); } EXPORT_SYMBOL(arch_local_irq_restore); diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index 697c7e4b5877..8bd8d8de5c40 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -219,6 +219,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_inf brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE); brk.type = HW_BRK_TYPE_TRANSLATE; brk.len = DABR_MAX_LEN; + brk.hw_len = DABR_MAX_LEN; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) brk.type |= HW_BRK_TYPE_READ; if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index e2ab8a111b69..0b4694b8d248 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -13,13 +13,14 @@ */ #include <linux/errno.h> -#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/param.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/init.h> +#include <linux/delay.h> +#include <linux/workqueue.h> #include <asm/io.h> #include <asm/reg.h> @@ -39,9 +40,7 @@ static struct tau_temp unsigned char grew; } tau[NR_CPUS]; -struct timer_list tau_timer; - -#undef DEBUG +static bool tau_int_enable; /* TODO: put these in a /proc interface, with some sanity checks, and maybe * dynamic adjustment to minimize # of interrupts */ @@ -50,72 +49,49 @@ struct timer_list tau_timer; #define step_size 2 /* step size when temp goes out of range */ #define window_expand 1 /* expand the window by this much */ /* configurable values for shrinking the window */ -#define shrink_timer 2*HZ /* period between shrinking the window */ +#define shrink_timer 2000 /* period between shrinking the window */ #define min_window 2 /* minimum window size, degrees C */ static void set_thresholds(unsigned long cpu) { -#ifdef CONFIG_TAU_INT - /* - * setup THRM1, - * threshold, valid bit, enable interrupts, interrupt when below threshold - */ - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); + u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0; - /* setup THRM2, - * threshold, valid bit, enable interrupts, interrupt when above threshold - */ - mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); -#else - /* same thing but don't enable interrupts */ - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID); - mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V); -#endif + /* setup THRM1, threshold, valid bit, interrupt when below threshold */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID); + + /* setup THRM2, threshold, valid bit, interrupt when above threshold */ + mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie); } static void TAUupdate(int cpu) { - unsigned thrm; - -#ifdef DEBUG - printk("TAUupdate "); -#endif + u32 thrm; + u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V; /* if both thresholds are crossed, the step_sizes cancel out * and the window winds up getting expanded twice. */ - if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */ - if(thrm & THRM1_TIN){ /* crossed low threshold */ - if (tau[cpu].low >= step_size){ - tau[cpu].low -= step_size; - tau[cpu].high -= (step_size - window_expand); - } - tau[cpu].grew = 1; -#ifdef DEBUG - printk("low threshold crossed "); -#endif + thrm = mfspr(SPRN_THRM1); + if ((thrm & bits) == bits) { + mtspr(SPRN_THRM1, 0); + + if (tau[cpu].low >= step_size) { + tau[cpu].low -= step_size; + tau[cpu].high -= (step_size - window_expand); } + tau[cpu].grew = 1; + pr_debug("%s: low threshold crossed\n", __func__); } - if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */ - if(thrm & THRM1_TIN){ /* crossed high threshold */ - if (tau[cpu].high <= 127-step_size){ - tau[cpu].low += (step_size - window_expand); - tau[cpu].high += step_size; - } - tau[cpu].grew = 1; -#ifdef DEBUG - printk("high threshold crossed "); -#endif + thrm = mfspr(SPRN_THRM2); + if ((thrm & bits) == bits) { + mtspr(SPRN_THRM2, 0); + + if (tau[cpu].high <= 127 - step_size) { + tau[cpu].low += (step_size - window_expand); + tau[cpu].high += step_size; } + tau[cpu].grew = 1; + pr_debug("%s: high threshold crossed\n", __func__); } - -#ifdef DEBUG - printk("grew = %d\n", tau[cpu].grew); -#endif - -#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */ - set_thresholds(cpu); -#endif - } #ifdef CONFIG_TAU_INT @@ -140,17 +116,16 @@ void TAUException(struct pt_regs * regs) static void tau_timeout(void * info) { int cpu; - unsigned long flags; int size; int shrink; - /* disabling interrupts *should* be okay */ - local_irq_save(flags); cpu = smp_processor_id(); -#ifndef CONFIG_TAU_INT - TAUupdate(cpu); -#endif + if (!tau_int_enable) + TAUupdate(cpu); + + /* Stop thermal sensor comparisons and interrupts */ + mtspr(SPRN_THRM3, 0); size = tau[cpu].high - tau[cpu].low; if (size > min_window && ! tau[cpu].grew) { @@ -173,32 +148,26 @@ static void tau_timeout(void * info) set_thresholds(cpu); - /* - * Do the enable every time, since otherwise a bunch of (relatively) - * complex sleep code needs to be added. One mtspr every time - * tau_timeout is called is probably not a big deal. - * - * Enable thermal sensor and set up sample interval timer - * need 20 us to do the compare.. until a nice 'cpu_speed' function - * call is implemented, just assume a 500 mhz clock. It doesn't really - * matter if we take too long for a compare since it's all interrupt - * driven anyway. - * - * use a extra long time.. (60 us @ 500 mhz) + /* Restart thermal sensor comparisons and interrupts. + * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet" + * recommends that "the maximum value be set in THRM3 under all + * conditions." */ - mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); - - local_irq_restore(flags); + mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E); } -static void tau_timeout_smp(struct timer_list *unused) -{ +static struct workqueue_struct *tau_workq; - /* schedule ourselves to be run again */ - mod_timer(&tau_timer, jiffies + shrink_timer) ; +static void tau_work_func(struct work_struct *work) +{ + msleep(shrink_timer); on_each_cpu(tau_timeout, NULL, 0); + /* schedule ourselves to be run again */ + queue_work(tau_workq, work); } +DECLARE_WORK(tau_work, tau_work_func); + /* * setup the TAU * @@ -231,21 +200,19 @@ static int __init TAU_init(void) return 1; } + tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && + !strcmp(cur_cpu_spec->platform, "ppc750"); - /* first, set up the window shrinking timer */ - timer_setup(&tau_timer, tau_timeout_smp, 0); - tau_timer.expires = jiffies + shrink_timer; - add_timer(&tau_timer); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); + if (!tau_workq) + return -ENOMEM; on_each_cpu(TAU_init_smp, NULL, 0); - printk("Thermal assist unit "); -#ifdef CONFIG_TAU_INT - printk("using interrupts, "); -#else - printk("using timers, "); -#endif - printk("shrink_timer: %d jiffies\n", shrink_timer); + queue_work(tau_workq, &tau_work); + + pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n", + tau_int_enable ? "interrupts" : "workqueue", shrink_timer); tau_initialized = 1; return 0; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 5e994cda8e40..c254f5f733a8 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ memcpy_power7.o obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ - memcpy_64.o memcpy_mcsafe_64.o + memcpy_64.o copy_mc_64.o obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/copy_mc_64.S index cb882d9a6d8a..88d46c471493 100644 --- a/arch/powerpc/lib/memcpy_mcsafe_64.S +++ b/arch/powerpc/lib/copy_mc_64.S @@ -50,7 +50,7 @@ err3; stb r0,0(r3) blr -_GLOBAL(memcpy_mcsafe) +_GLOBAL(copy_mc_generic) mr r7,r5 cmpldi r5,16 blt .Lshort_copy @@ -239,4 +239,4 @@ err1; stb r0,0(r3) 15: li r3,0 blr -EXPORT_SYMBOL_GPL(memcpy_mcsafe); +EXPORT_SYMBOL_GPL(copy_mc_generic); diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index b5cc9b23cf02..277a07772e7d 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -644,19 +644,29 @@ static void do_exit_flush_lazy_tlb(void *arg) struct mm_struct *mm = arg; unsigned long pid = mm->context.id; + /* + * A kthread could have done a mmget_not_zero() after the flushing CPU + * checked mm_is_singlethreaded, and be in the process of + * kthread_use_mm when interrupted here. In that case, current->mm will + * be set to mm, because kthread_use_mm() setting ->mm and switching to + * the mm is done with interrupts off. + */ if (current->mm == mm) - return; /* Local CPU */ + goto out_flush; if (current->active_mm == mm) { - /* - * Must be a kernel thread because sender is single-threaded. - */ - BUG_ON(current->mm); + WARN_ON_ONCE(current->mm != NULL); + /* Is a kernel thread and is using mm as the lazy tlb */ mmgrab(&init_mm); - switch_mm(mm, &init_mm, current); current->active_mm = &init_mm; + switch_mm_irqs_off(mm, &init_mm, current); mmdrop(mm); } + + atomic_dec(&mm->context.active_cpus); + cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm)); + +out_flush: _tlbiel_pid(pid, RIC_FLUSH_ALL); } @@ -671,7 +681,6 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) */ smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, (void *)mm, 1); - mm_reset_thread_local(mm); } void radix__flush_tlb_mm(struct mm_struct *mm) diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 59327cefbc6a..873fcfc7b875 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -362,10 +362,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) if (!drmem_info->lmbs) return; - for_each_drmem_lmb(lmb) { + for_each_drmem_lmb(lmb) read_drconf_v1_cell(lmb, &prop); - lmb_set_nid(lmb); - } } static void __init init_drmem_v2_lmbs(const __be32 *prop) @@ -410,8 +408,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) lmb->aa_index = dr_cell.aa_index; lmb->flags = dr_cell.flags; - - lmb_set_nid(lmb); } } } diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 019b0c0bbbf3..ca91d04d0a7a 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -121,8 +121,7 @@ void __init kasan_mmu_init(void) { int ret; - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || - IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) { ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); if (ret) @@ -133,11 +132,11 @@ void __init kasan_mmu_init(void) void __init kasan_init(void) { struct memblock_region *reg; + int ret; for_each_memblock(memory, reg) { phys_addr_t base = reg->base; phys_addr_t top = min(base + reg->size, total_lowmem); - int ret; if (base >= top) continue; @@ -147,6 +146,13 @@ void __init kasan_init(void) panic("kasan: kasan_init_region() failed"); } + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); + + if (ret) + panic("kasan: kasan_init_shadow_page_tables() failed"); + } + kasan_remap_early_shadow_ro(); clear_page(kasan_early_shadow_page); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c2c11eb8dcfc..0f21bcb16405 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -50,6 +50,7 @@ #include <asm/swiotlb.h> #include <asm/rtas.h> #include <asm/kasan.h> +#include <asm/svm.h> #include <mm/mmu_decl.h> @@ -290,7 +291,10 @@ void __init mem_init(void) * back to to-down. */ memblock_set_bottom_up(true); - swiotlb_init(0); + if (is_secure_guest()) + svm_swiotlb_init(); + else + swiotlb_init(0); #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index e608f9db12dd..8965b4463d43 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -95,7 +95,7 @@ REQUEST(__field(0, 8, partition_id) #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__field(0, 1, perf_collect_privileged) __field(0x1, 1, capability_mask) @@ -223,7 +223,7 @@ REQUEST(__field(0, 2, partition_id) #define REQUEST_NAME system_hypervisor_times #define REQUEST_NUM 0xF0 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) __count(0x8, 8, time_spent_processing_virtual_processor_timers) @@ -234,7 +234,7 @@ REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) #define REQUEST_NAME system_tlbie_count_and_time #define REQUEST_NUM 0xF4 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__count(0, 8, tlbie_instructions_issued) /* diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4c86da5eb28a..0b5c8f4fbdbf 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -269,6 +269,15 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) mask |= CNST_PMC_MASK(pmc); value |= CNST_PMC_VAL(pmc); + + /* + * PMC5 and PMC6 are used to count cycles and instructions and + * they do not support most of the constraint bits. Add a check + * to exclude PMC5/6 from most of the constraints except for + * EBB/BHRB. + */ + if (pmc >= 5) + goto ebb_bhrb; } if (pmc <= 4) { @@ -335,6 +344,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } } +ebb_bhrb: if (!pmc && ebb) /* EBB events must specify the PMC */ return -1; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index fb7515b4fa9c..b439b027a42f 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -223,12 +223,11 @@ config TAU temperature within 2-4 degrees Celsius. This option shows the current on-die temperature in /proc/cpuinfo if the cpu supports it. - Unfortunately, on some chip revisions, this sensor is very inaccurate - and in many cases, does not work at all, so don't assume the cpu - temp is actually what /proc/cpuinfo says it is. + Unfortunately, this sensor is very inaccurate when uncalibrated, so + don't assume the cpu temp is actually what /proc/cpuinfo says it is. config TAU_INT - bool "Interrupt driven TAU driver (DANGEROUS)" + bool "Interrupt driven TAU driver (EXPERIMENTAL)" depends on TAU help The TAU supports an interrupt driven mode which causes an interrupt @@ -236,12 +235,7 @@ config TAU_INT to get notified the temp has exceeded a range. With this option off, a timer is used to re-check the temperature periodically. - However, on some cpus it appears that the TAU interrupt hardware - is buggy and can cause a situation which would lead unexplained hard - lockups. - - Unless you are extending the TAU driver, or enjoy kernel/hardware - debugging, leave this option off. + If in doubt, say N here. config TAU_AVERAGE bool "Average high and low temp" diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 543c816fa99e..0e6693bacb7e 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -318,15 +318,14 @@ static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct dump_obj *create_dump_obj(uint32_t id, size_t size, - uint32_t type) +static void create_dump_obj(uint32_t id, size_t size, uint32_t type) { struct dump_obj *dump; int rc; dump = kzalloc(sizeof(*dump), GFP_KERNEL); if (!dump) - return NULL; + return; dump->kobj.kset = dump_kset; @@ -346,21 +345,39 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size, rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id); if (rc) { kobject_put(&dump->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this dump is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the dump before kobject_uevent() is called. If that + * happens then there is a potential race between + * dump_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&dump->kobj); rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&dump->kobj, KOBJ_ADD); + + pr_info("%s: New platform dump. ID = 0x%x Size %u\n", + __func__, dump->id, dump->size); + } else { + /* Drop reference count taken for bin file */ kobject_put(&dump->kobj); - return NULL; } - pr_info("%s: New platform dump. ID = 0x%x Size %u\n", - __func__, dump->id, dump->size); - - kobject_uevent(&dump->kobj, KOBJ_ADD); - - return dump; + /* Drop our reference */ + kobject_put(&dump->kobj); + return; } static irqreturn_t process_dump(int irq, void *data) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8b748690dac2..9f236149b402 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -356,25 +356,32 @@ static int dlpar_add_lmb(struct drmem_lmb *); static int dlpar_remove_lmb(struct drmem_lmb *lmb) { + struct memory_block *mem_block; unsigned long block_sz; int rc; if (!lmb_is_removable(lmb)) return -EINVAL; + mem_block = lmb_to_memblock(lmb); + if (mem_block == NULL) + return -EINVAL; + rc = dlpar_offline_lmb(lmb); - if (rc) + if (rc) { + put_device(&mem_block->dev); return rc; + } block_sz = pseries_memory_block_size(); - __remove_memory(lmb->nid, lmb->base_addr, block_sz); + __remove_memory(mem_block->nid, lmb->base_addr, block_sz); + put_device(&mem_block->dev); /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); - lmb_clear_nid(lmb); lmb->flags &= ~DRCONF_MEM_ASSIGNED; return 0; @@ -631,7 +638,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) static int dlpar_add_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int rc; + int nid, rc; if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; @@ -642,11 +649,13 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) return rc; } - lmb_set_nid(lmb); block_sz = memory_block_size_bytes(); + /* Find the node id for this address. */ + nid = memory_add_physaddr_to_nid(lmb->base_addr); + /* Add the memory */ - rc = __add_memory(lmb->nid, lmb->base_addr, block_sz); + rc = __add_memory(nid, lmb->base_addr, block_sz); if (rc) { invalidate_lmb_associativity_index(lmb); return rc; @@ -654,9 +663,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(lmb->nid, lmb->base_addr, block_sz); + __remove_memory(nid, lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); - lmb_clear_nid(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; } diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 9c569078a09f..6c2c66450dac 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -702,6 +702,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) p->bus_desc.of_node = p->pdev->dev.of_node; p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); + /* Set the dimm command family mask to accept PDSMs */ + set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask); + if (!p->bus_desc.provider_name) return -ENOMEM; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 13c86a292c6d..b2b245b25edb 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -521,18 +521,55 @@ int pSeries_system_reset_exception(struct pt_regs *regs) return 0; /* need to perform reset */ } +static int mce_handle_err_realmode(int disposition, u8 error_type) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (disposition == RTAS_DISP_NOT_RECOVERED) { + switch (error_type) { + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + /* + * Store the old slb content in paca before flushing. + * Print this when we go to virtual mode. + * There are chances that we may hit MCE again if there + * is a parity error on the SLB entry we trying to read + * for saving. Hence limit the slb saving to single + * level of recursion. + */ + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + flush_and_reload_slb(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + default: + break; + } + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { + /* Platform corrected itself but could be degraded */ + pr_err("MCE: limited recovery, system may be degraded\n"); + disposition = RTAS_DISP_FULLY_RECOVERED; + } +#endif + return disposition; +} -static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +static int mce_handle_err_virtmode(struct pt_regs *regs, + struct rtas_error_log *errp, + struct pseries_mc_errorlog *mce_log, + int disposition) { struct mce_error_info mce_err = { 0 }; - unsigned long eaddr = 0, paddr = 0; - struct pseries_errorlog *pseries_log; - struct pseries_mc_errorlog *mce_log; - int disposition = rtas_error_disposition(errp); int initiator = rtas_error_initiator(errp); int severity = rtas_error_severity(errp); + unsigned long eaddr = 0, paddr = 0; u8 error_type, err_sub_type; + if (!mce_log) + goto out; + + error_type = mce_log->error_type; + err_sub_type = rtas_mc_error_sub_type(mce_log); + if (initiator == RTAS_INITIATOR_UNKNOWN) mce_err.initiator = MCE_INITIATOR_UNKNOWN; else if (initiator == RTAS_INITIATOR_CPU) @@ -571,18 +608,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; mce_err.error_class = MCE_ECLASS_UNKNOWN; - if (!rtas_error_extended(errp)) - goto out; - - pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); - if (pseries_log == NULL) - goto out; - - mce_log = (struct pseries_mc_errorlog *)pseries_log->data; - error_type = mce_log->error_type; - err_sub_type = rtas_mc_error_sub_type(mce_log); - - switch (mce_log->error_type) { + switch (error_type) { case MC_ERROR_TYPE_UE: mce_err.error_type = MCE_ERROR_TYPE_UE; mce_common_process_ue(regs, &mce_err); @@ -682,37 +708,31 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; break; } +out: + save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, + &mce_err, regs->nip, eaddr, paddr); + return disposition; +} -#ifdef CONFIG_PPC_BOOK3S_64 - if (disposition == RTAS_DISP_NOT_RECOVERED) { - switch (error_type) { - case MC_ERROR_TYPE_SLB: - case MC_ERROR_TYPE_ERAT: - /* - * Store the old slb content in paca before flushing. - * Print this when we go to virtual mode. - * There are chances that we may hit MCE again if there - * is a parity error on the SLB entry we trying to read - * for saving. Hence limit the slb saving to single - * level of recursion. - */ - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - flush_and_reload_slb(); - disposition = RTAS_DISP_FULLY_RECOVERED; - break; - default: - break; - } - } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { - /* Platform corrected itself but could be degraded */ - printk(KERN_ERR "MCE: limited recovery, system may " - "be degraded\n"); - disposition = RTAS_DISP_FULLY_RECOVERED; - } -#endif +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log = NULL; + int disposition = rtas_error_disposition(errp); + u8 error_type; + + if (!rtas_error_extended(errp)) + goto out; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = mce_log->error_type; + + disposition = mce_handle_err_realmode(disposition, error_type); -out: /* * Enable translation as we will be accessing per-cpu variables * in save_mce_event() which may fall outside RMO region, also @@ -723,10 +743,10 @@ out: * Note: All the realmode handling like flushing SLB entries for * SLB multihit is done by now. */ +out: mtmsr(mfmsr() | MSR_IR | MSR_DR); - save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, - &mce_err, regs->nip, eaddr, paddr); - + disposition = mce_handle_err_virtmode(regs, errp, mce_log, + disposition); return disposition; } diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index bbb97169bf63..6268545947b8 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -36,6 +36,7 @@ static __init int rng_init(void) ppc_md.get_random_seed = pseries_get_random_long; + of_node_put(dn); return 0; } machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 40c0637203d5..81085eb8f225 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -7,6 +7,7 @@ */ #include <linux/mm.h> +#include <linux/memblock.h> #include <asm/machdep.h> #include <asm/svm.h> #include <asm/swiotlb.h> @@ -34,6 +35,31 @@ static int __init init_svm(void) } machine_early_initcall(pseries, init_svm); +/* + * Initialize SWIOTLB. Essentially the same as swiotlb_init(), except that it + * can allocate the buffer anywhere in memory. Since the hypervisor doesn't have + * any addressing limitation, we don't need to allocate it in low addresses. + */ +void __init svm_swiotlb_init(void) +{ + unsigned char *vstart; + unsigned long bytes, io_tlb_nslabs; + + io_tlb_nslabs = (swiotlb_size_or_default() >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE); + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false)) + return; + + if (io_tlb_start) + memblock_free_early(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + panic("SVM: Cannot allocate SWIOTLB buffer"); +} + int set_memory_encrypted(unsigned long addr, int numpages) { if (!PAGE_ALIGNED(addr)) diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index ad8117148ea3..21b9d1bf39ff 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -174,6 +174,7 @@ int icp_hv_init(void) icp_ops = &icp_hv_ops; + of_node_put(np); return 0; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 7efe4bc3ccf6..ac5862cee142 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -962,6 +962,7 @@ static void insert_cpu_bpts(void) brk.address = dabr[i].address; brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; brk.len = 8; + brk.hw_len = 8; __set_breakpoint(i, &brk); } } |